Add spellcheck and word suggestion to the prediction service
Add suggestion strip to the keyboard service
Add default English dictionary
Add Android LatinIME/native/jni/src suggestion codebase to third_party/android_prediction
Modify keyboard client text display
R=anwilson@chromium.org, viettrungluu@chromium.org
Review URL: https://codereview.chromium.org/1247903003 .
diff --git a/.gitignore b/.gitignore
index ba6de3a..d695766 100644
--- a/.gitignore
+++ b/.gitignore
@@ -108,3 +108,6 @@
# breakpad tool
/mojo/tools/linux64/dump_syms
/mojo/tools/linux64/symupload
+
+# downloaded prediction dictionary files.
+/services/prediction/res/*.dict
diff --git a/DEPS b/DEPS
index 6602e37..b218357 100644
--- a/DEPS
+++ b/DEPS
@@ -423,6 +423,17 @@
'--no_auth',
'--bucket', 'mojo',
'-s', 'src/mojo/tools/linux64/symupload.sha1',
+ ],
+ },
+ # Pull prediction resources using checked-in hashes.
+ {
+ 'name': 'prediction_resources',
+ 'pattern': '',
+ 'action': [ 'download_from_google_storage',
+ '--no_resume',
+ '--no_auth',
+ '--bucket', 'mojo/prediction',
+ '-d', 'src/services/prediction/res',
],
},
]
diff --git a/examples/keyboard_client/keyboard_client.cc b/examples/keyboard_client/keyboard_client.cc
index a9ef5e1..e310323 100644
--- a/examples/keyboard_client/keyboard_client.cc
+++ b/examples/keyboard_client/keyboard_client.cc
@@ -2,6 +2,8 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
+#include <algorithm>
+
#include "base/bind.h"
#include "base/macros.h"
#include "mojo/application/application_runner_chromium.h"
@@ -219,16 +221,7 @@
void CommitText(const mojo::String& text,
int32_t new_cursor_position) override {
- std::string combined(text_[1]);
- combined.append(text);
- SkRect bounds;
- text_paint_.measureText(combined.data(), combined.size(), &bounds);
- if (bounds.width() > text_view_->bounds().width) {
- text_[0] = text_[1];
- text_[1] = text;
- } else {
- text_[1].append(text);
- }
+ text_.append(text);
DrawText();
}
@@ -236,10 +229,10 @@
int32_t after_length) override {
// treat negative and zero |before_length| values as no-op.
if (before_length > 0) {
- if (before_length > static_cast<int32_t>(text_[1].size())) {
- before_length = text_[1].size();
+ if (before_length > static_cast<int32_t>(text_.size())) {
+ before_length = text_.size();
}
- text_[1].erase(text_[1].end() - before_length, text_[1].end());
+ text_.erase(text_.end() - before_length, text_.end());
}
DrawText();
}
@@ -316,20 +309,63 @@
float row_height = text_view_height_ / 2.0f;
float text_baseline_offset = row_height / 5.0f;
+ if (!text_.empty()) {
+ SkRect sk_rect;
+ text_paint_.measureText((const void*)(text_.c_str()), text_.length(),
+ &sk_rect);
- if (!text_[0].empty()) {
- canvas->drawText(text_[0].data(), text_[0].size(), 0.0f,
- row_height - text_baseline_offset, text_paint_);
- }
+ if (sk_rect.width() > text_view_->bounds().width) {
+ std::string reverse_text = text_;
+ std::reverse(reverse_text.begin(), reverse_text.end());
- if (!text_[1].empty()) {
- canvas->drawText(text_[1].data(), text_[1].size(), 0.0f,
- (2.0f * row_height) - text_baseline_offset, text_paint_);
+ size_t processed1 = text_paint_.breakText(
+ (const void*)(reverse_text.c_str()), strlen(reverse_text.c_str()),
+ text_view_->bounds().width);
+ size_t processed2 = text_paint_.breakText(
+ (const void*)(reverse_text.c_str() + processed1),
+ strlen(reverse_text.c_str()) - processed1,
+ text_view_->bounds().width);
+ if (processed1 + processed2 < text_.length()) {
+ DrawSecondLine(canvas, text_.length() - processed1, processed1,
+ row_height, text_baseline_offset);
+ DrawFirstLine(canvas, text_.length() - processed1 - processed2,
+ processed2, row_height, text_baseline_offset);
+ } else {
+ size_t processed3 =
+ text_paint_.breakText((const void*)(text_.c_str()),
+ text_.length(), text_view_->bounds().width);
+ DrawFirstLine(canvas, 0, processed3, row_height,
+ text_baseline_offset);
+ DrawSecondLine(canvas, processed3, text_.length() - processed3,
+ row_height, text_baseline_offset);
+ }
+ } else {
+ DrawSecondLine(canvas, 0, text_.length(), row_height,
+ text_baseline_offset);
+ }
}
canvas->flush();
}
+ void DrawFirstLine(SkCanvas* canvas,
+ const size_t offset,
+ const size_t length,
+ const float row_height,
+ const float text_baseline_offset) {
+ canvas->drawText(text_.data() + offset, length, 0.0f,
+ row_height - text_baseline_offset, text_paint_);
+ }
+
+ void DrawSecondLine(SkCanvas* canvas,
+ const size_t offset,
+ const size_t length,
+ const float row_height,
+ const float text_baseline_offset) {
+ canvas->drawText(text_.data() + offset, length, 0.0f,
+ (2.0f * row_height) - text_baseline_offset, text_paint_);
+ }
+
void DrawRootView(SkCanvas* canvas) {
canvas->clear(SK_ColorDKGRAY);
canvas->flush();
@@ -352,7 +388,7 @@
scoped_ptr<ViewTextureUploader> text_view_texture_uploader_;
scoped_ptr<ViewTextureUploader> root_view_texture_uploader_;
int text_view_height_;
- std::string text_[2];
+ std::string text_;
SkPaint text_paint_;
base::WeakPtrFactory<KeyboardDelegate> weak_factory_;
diff --git a/mojo/services/prediction/public/interfaces/prediction.mojom b/mojo/services/prediction/public/interfaces/prediction.mojom
index 58fa8d2..e139724 100644
--- a/mojo/services/prediction/public/interfaces/prediction.mojom
+++ b/mojo/services/prediction/public/interfaces/prediction.mojom
@@ -5,19 +5,16 @@
[DartPackage="mojo_services"]
module prediction;
-struct Settings {
- bool correction_enabled;
- bool block_potentially_offensive;
- bool space_aware_gesture_enabled;
+struct PrevWordInfo {
+ string word;
+ bool is_beginning_of_sentence;
};
struct PredictionInfo {
- array<string> previous_words;
- string current_word;
+ array<PrevWordInfo> previous_words;
+ string current_word;
};
interface PredictionService {
- SetSettings(Settings settings);
-
GetPredictionList(PredictionInfo prediction_info) => (array<string>? prediction_list);
};
diff --git a/services/keyboard_native/BUILD.gn b/services/keyboard_native/BUILD.gn
index a52b517..7cf5260 100644
--- a/services/keyboard_native/BUILD.gn
+++ b/services/keyboard_native/BUILD.gn
@@ -21,6 +21,10 @@
"material_splash_animation.h",
"motion_decay_animation.cc",
"motion_decay_animation.h",
+ "predictor.cc",
+ "predictor.h",
+ "text_update_key.cc",
+ "text_update_key.h",
"time_interpolator.h",
"view_observer_delegate.cc",
"view_observer_delegate.h",
@@ -42,6 +46,7 @@
"//mojo/services/geometry/public/cpp",
"//mojo/services/geometry/public/interfaces",
"//mojo/services/keyboard/public/interfaces",
+ "//mojo/services/prediction/public/interfaces",
"//mojo/services/surfaces/public/cpp",
"//mojo/services/surfaces/public/interfaces",
"//mojo/services/surfaces/public/interfaces:surface_id",
diff --git a/services/keyboard_native/key_layout.cc b/services/keyboard_native/key_layout.cc
index a41eb6e..722e1ba 100644
--- a/services/keyboard_native/key_layout.cc
+++ b/services/keyboard_native/key_layout.cc
@@ -11,6 +11,8 @@
#include "services/keyboard_native/kLowerCaseIcon.h"
#include "services/keyboard_native/kUpperCaseIcon.h"
#include "services/keyboard_native/key_layout.h"
+#include "services/keyboard_native/predictor.h"
+#include "services/keyboard_native/text_update_key.h"
#include "skia/ext/refptr.h"
#include "third_party/skia/include/core/SkCanvas.h"
#include "third_party/skia/include/core/SkImageDecoder.h"
@@ -103,8 +105,10 @@
KeyLayout::KeyLayout()
: on_text_callback_(),
+ on_suggest_text_callback_(),
layout_(&letters_layout_),
key_map_(&lower_case_key_map_),
+ predictor_(nullptr),
weak_factory_(this) {
InitLayouts();
InitKeyMaps();
@@ -137,6 +141,11 @@
on_delete_callback_ = on_delete_callback;
}
+void KeyLayout::SetSuggestTextCallback(
+ base::Callback<void(const std::string&)> on_suggest_text_callback) {
+ on_suggest_text_callback_ = on_suggest_text_callback;
+}
+
void KeyLayout::SetKeyArea(const gfx::RectF& key_area) {
key_area_ = key_area;
}
@@ -174,7 +183,6 @@
key_index++) {
float key_width = static_cast<float>(key_area_.width()) *
(*layout_)[row_index][key_index];
-
(*key_map_)[row_index][key_index]->Draw(
canvas, text_paint,
gfx::RectF(current_left, current_top, key_width, row_height));
@@ -208,6 +216,11 @@
}
}
+void KeyLayout::SetPredictor(Predictor* predictor) {
+ predictor_ = predictor;
+ predictor_->SetSuggestionKeys(lower_case_key_map_[0]);
+}
+
void KeyLayout::InitLayouts() {
// Row layouts are specified by a vector of floats which indicate the percent
// width a given key takes up in that row. The floats of a given row *MUST*
@@ -219,12 +232,15 @@
std::vector<float> seven_key_row_layout = {
0.15f, 0.1f, 0.1f, 0.3f, 0.1f, 0.1f, 0.15f};
std::vector<float> five_key_row_layout = {0.15f, 0.1f, 0.5f, 0.1f, 0.15f};
+ std::vector<float> three_key_row_layout = {0.33f, 0.33f, 0.33f};
+ letters_layout_.push_back(three_key_row_layout);
letters_layout_.push_back(ten_key_row_layout);
letters_layout_.push_back(nine_key_row_layout);
letters_layout_.push_back(nine_key_row_layout);
letters_layout_.push_back(five_key_row_layout);
+ symbols_layout_.push_back(three_key_row_layout);
symbols_layout_.push_back(ten_key_row_layout);
symbols_layout_.push_back(nine_key_row_layout);
symbols_layout_.push_back(nine_key_row_layout);
@@ -236,26 +252,40 @@
}
void KeyLayout::OnKeyEmitText(const TextKey& key) {
+ predictor_->StoreCurWord(std::string(key.ToText()));
on_text_callback_.Run(std::string(key.ToText()));
}
void KeyLayout::OnKeyDelete(const TextKey& key) {
+ predictor_->DeleteCharInCurWord();
on_delete_callback_.Run();
}
+void KeyLayout::OnSuggestKeyEmitText(const TextUpdateKey& key) {
+ std::string update_string = std::string(key.ToText());
+ int delete_size = predictor_->ChooseSuggestedWord(std::string(key.ToText()));
+ for (int i = 0; i < delete_size; i++) {
+ on_delete_callback_.Run();
+ }
+ on_suggest_text_callback_.Run(update_string);
+}
+
void KeyLayout::OnKeySwitchToUpperCase(const TextKey& key) {
layout_ = &letters_layout_;
key_map_ = &upper_case_key_map_;
+ predictor_->SetSuggestionKeys(upper_case_key_map_[0]);
}
void KeyLayout::OnKeySwitchToLowerCase(const TextKey& key) {
layout_ = &letters_layout_;
key_map_ = &lower_case_key_map_;
+ predictor_->SetSuggestionKeys(lower_case_key_map_[0]);
}
void KeyLayout::OnKeySwitchToSymbols(const TextKey& key) {
layout_ = &symbols_layout_;
key_map_ = &symbols_key_map_;
+ predictor_->SetSuggestionKeys(symbols_key_map_[0]);
}
void KeyLayout::InitKeyMaps() {
@@ -265,6 +295,8 @@
base::Bind(&KeyLayout::OnKeyEmitText, weak_factory_.GetWeakPtr());
base::Callback<void(const TextKey&)> delete_callback =
base::Bind(&KeyLayout::OnKeyDelete, weak_factory_.GetWeakPtr());
+ base::Callback<void(const TextUpdateKey&)> suggest_emit_text_callback =
+ base::Bind(&KeyLayout::OnSuggestKeyEmitText, weak_factory_.GetWeakPtr());
base::Callback<void(const TextKey&)> switch_to_upper_case_callback =
base::Bind(&KeyLayout::OnKeySwitchToUpperCase,
weak_factory_.GetWeakPtr());
@@ -283,6 +315,21 @@
ImageKey* action_image_key =
new ImageKey(":)", do_nothing_callback, keyboard_native::kActionIcon);
+ std::vector<Key*> suggestion_strip_key_map_row_lower = {
+ new TextUpdateKey("", suggest_emit_text_callback),
+ new TextUpdateKey("", suggest_emit_text_callback),
+ new TextUpdateKey("", suggest_emit_text_callback)};
+
+ std::vector<Key*> suggestion_strip_key_map_row_upper = {
+ new TextUpdateKey("", suggest_emit_text_callback),
+ new TextUpdateKey("", suggest_emit_text_callback),
+ new TextUpdateKey("", suggest_emit_text_callback)};
+
+ std::vector<Key*> suggestion_strip_key_map_row_symbol = {
+ new TextUpdateKey("", suggest_emit_text_callback),
+ new TextUpdateKey("", suggest_emit_text_callback),
+ new TextUpdateKey("", suggest_emit_text_callback)};
+
std::vector<Key*> lower_case_key_map_row_one = {
new TextKey("q", emit_text_callback),
new TextKey("w", emit_text_callback),
@@ -324,7 +371,8 @@
new TextKey(".", emit_text_callback),
action_image_key};
- lower_case_key_map_ = {lower_case_key_map_row_one,
+ lower_case_key_map_ = {suggestion_strip_key_map_row_lower,
+ lower_case_key_map_row_one,
lower_case_key_map_row_two,
lower_case_key_map_row_three,
lower_case_key_map_row_four};
@@ -370,7 +418,8 @@
new TextKey(".", emit_text_callback),
action_image_key};
- upper_case_key_map_ = {upper_case_key_map_row_one,
+ upper_case_key_map_ = {suggestion_strip_key_map_row_upper,
+ upper_case_key_map_row_one,
upper_case_key_map_row_two,
upper_case_key_map_row_three,
upper_case_key_map_row_four};
@@ -418,10 +467,11 @@
new TextKey(".", emit_text_callback),
action_image_key};
- symbols_key_map_ = {symbols_key_map_row_one,
+ symbols_key_map_ = {suggestion_strip_key_map_row_symbol,
+ symbols_key_map_row_one,
symbols_key_map_row_two,
symbols_key_map_row_three,
symbols_key_map_row_four};
}
-}
-// namespace keyboard
+
+} // namespace keyboard
diff --git a/services/keyboard_native/key_layout.h b/services/keyboard_native/key_layout.h
index cd002c2..ccc33ed 100644
--- a/services/keyboard_native/key_layout.h
+++ b/services/keyboard_native/key_layout.h
@@ -22,6 +22,9 @@
namespace keyboard {
+class Predictor;
+class TextUpdateKey;
+
// Represents a drawable keyboard.
class KeyLayout {
public:
@@ -55,6 +58,9 @@
// Sets the callback to call whenever delete is pressed.
void SetDeleteCallback(base::Callback<void()> on_delete_callback);
+ void SetSuggestTextCallback(
+ base::Callback<void(const std::string&)> on_suggest_text_callback);
+
// Sets the dimensions the keyboard will draw itself into.
void SetKeyArea(const gfx::RectF& key_area);
@@ -68,6 +74,8 @@
// Indicate to the keyboard that a touch up has occurred at the given Point.
void OnTouchUp(const gfx::PointF& touch_up);
+ void SetPredictor(Predictor* predictor);
+
private:
// initializes the *_layout_ vectors.
void InitLayouts();
@@ -85,6 +93,8 @@
// A TextKey callback that calls on_delete_callback_.
void OnKeyDelete(const TextKey& key);
+ void OnSuggestKeyEmitText(const TextUpdateKey& key);
+
// A TextKey callback that switches the layout_ and key_map_ to upper case.
void OnKeySwitchToUpperCase(const TextKey& key);
@@ -96,6 +106,7 @@
base::Callback<void(const std::string&)> on_text_callback_;
base::Callback<void()> on_delete_callback_;
+ base::Callback<void(const std::string&)> on_suggest_text_callback_;
gfx::RectF key_area_;
std::vector<std::vector<float>>* layout_;
std::vector<std::vector<float>> letters_layout_;
@@ -104,6 +115,7 @@
std::vector<std::vector<Key*>> lower_case_key_map_;
std::vector<std::vector<Key*>> upper_case_key_map_;
std::vector<std::vector<Key*>> symbols_key_map_;
+ Predictor* predictor_;
base::WeakPtrFactory<KeyLayout> weak_factory_;
DISALLOW_COPY_AND_ASSIGN(KeyLayout);
diff --git a/services/keyboard_native/predictor.cc b/services/keyboard_native/predictor.cc
new file mode 100644
index 0000000..2465c44
--- /dev/null
+++ b/services/keyboard_native/predictor.cc
@@ -0,0 +1,129 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <iterator>
+#include <sstream>
+#include <string>
+
+#include "base/bind.h"
+#include "services/keyboard_native/predictor.h"
+#include "services/keyboard_native/text_update_key.h"
+
+namespace keyboard {
+
+Predictor::Predictor(mojo::Shell* shell) {
+ mojo::ServiceProviderPtr prediction_service_provider;
+ shell->ConnectToApplication("mojo:prediction_service",
+ mojo::GetProxy(&prediction_service_provider),
+ nullptr);
+ mojo::ConnectToService(prediction_service_provider.get(),
+ &prediction_service_impl_);
+ suggestion_keys_.clear();
+}
+
+Predictor::~Predictor() {
+}
+
+void Predictor::SetSuggestionKeys(
+ std::vector<KeyLayout::Key*> suggestion_keys) {
+ size_t old_size = suggestion_keys_.size();
+ size_t new_size = suggestion_keys.size();
+ size_t keyloop_size = std::min(old_size, new_size);
+ for (size_t i = 0; i < keyloop_size; i++) {
+ if (old_size != 0) {
+ static_cast<TextUpdateKey*>(suggestion_keys[i])
+ ->ChangeText(suggestion_keys_[i]->ToText());
+ suggestion_keys_[i] = suggestion_keys[i];
+ } else {
+ suggestion_keys_.push_back(suggestion_keys[i]);
+ }
+ }
+ if (new_size < old_size) {
+ suggestion_keys_.erase(suggestion_keys_.begin() + new_size,
+ suggestion_keys_.end());
+ } else if (old_size < new_size) {
+ suggestion_keys_.insert(suggestion_keys_.end(),
+ suggestion_keys.begin() + old_size,
+ suggestion_keys.end());
+ }
+}
+
+void Predictor::SetUpdateCallback(base::Callback<void()> on_update_callback) {
+ on_update_callback_ = on_update_callback;
+}
+
+void Predictor::StoreCurWord(std::string new_word) {
+ if (new_word == " ") {
+ previous_words_.push_back(current_word_);
+ current_word_ = "";
+ Predictor::ShowEmptySuggestion();
+ } else {
+ current_word_ += new_word;
+ Predictor::GetSuggestion();
+ }
+}
+
+int Predictor::ChooseSuggestedWord(std::string suggested) {
+ int old_size = static_cast<int>(current_word_.size());
+ // split suggested by space into a vector
+ std::istringstream sug(suggested);
+ std::istream_iterator<std::string> beg(sug), end;
+ std::vector<std::string> sugs(beg, end);
+ previous_words_.insert(previous_words_.end(), sugs.begin(), sugs.end());
+ current_word_ = "";
+ Predictor::ShowEmptySuggestion();
+ return old_size;
+}
+
+void Predictor::DeleteCharInCurWord() {
+ if (!current_word_.empty()) {
+ current_word_.erase(current_word_.end() - 1);
+ if (current_word_.empty()) {
+ Predictor::ShowEmptySuggestion();
+ } else {
+ Predictor::GetSuggestion();
+ }
+ } else if (!previous_words_.empty()) {
+ current_word_ = previous_words_.back();
+ previous_words_.pop_back();
+ if (!current_word_.empty())
+ Predictor::GetSuggestion();
+ }
+}
+
+void Predictor::ShowEmptySuggestion() {
+ for (size_t i = 0; i < suggestion_keys_.size(); i++) {
+ static_cast<TextUpdateKey*>(suggestion_keys_[i])->ChangeText("");
+ }
+ on_update_callback_.Run();
+}
+
+void Predictor::GetSuggestion() {
+ prediction::PredictionInfoPtr prediction_info =
+ prediction::PredictionInfo::New();
+ // we are not using bigram atm
+ prediction_info->previous_words =
+ mojo::Array<prediction::PrevWordInfoPtr>::New(0).Pass();
+ prediction_info->current_word = mojo::String(current_word_);
+
+ prediction_service_impl_->GetPredictionList(
+ prediction_info.Pass(),
+ base::Bind(&Predictor::GetPredictionListAndEnd, base::Unretained(this)));
+}
+
+void Predictor::GetPredictionListAndEnd(
+ const mojo::Array<mojo::String>& input_list) {
+ for (size_t i = 0; i < suggestion_keys_.size(); i++) {
+ std::string change_text;
+ if (i < input_list.size()) {
+ change_text = std::string(input_list[i].data());
+ } else {
+ change_text = "";
+ }
+ static_cast<TextUpdateKey*>(suggestion_keys_[i])->ChangeText(change_text);
+ }
+ on_update_callback_.Run();
+}
+
+} // namespace keyboard
\ No newline at end of file
diff --git a/services/keyboard_native/predictor.h b/services/keyboard_native/predictor.h
new file mode 100644
index 0000000..51b84e0
--- /dev/null
+++ b/services/keyboard_native/predictor.h
@@ -0,0 +1,51 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SERVICES_KEYBOARD_NATIVE_PREDICTOR_H_
+#define SERVICES_KEYBOARD_NATIVE_PREDICTOR_H_
+
+#include <vector>
+
+#include "base/callback.h"
+#include "mojo/public/cpp/application/connect.h"
+#include "mojo/public/interfaces/application/shell.mojom.h"
+#include "mojo/services/prediction/public/interfaces/prediction.mojom.h"
+#include "services/keyboard_native/key_layout.h"
+
+namespace keyboard {
+
+class Predictor {
+ public:
+ Predictor(mojo::Shell* shell);
+ ~Predictor();
+
+ void SetSuggestionKeys(std::vector<KeyLayout::Key*> suggestion_keys);
+
+ void SetUpdateCallback(base::Callback<void()> on_update_callback);
+
+ void StoreCurWord(std::string new_word);
+
+ int ChooseSuggestedWord(std::string suggested);
+
+ void DeleteCharInCurWord();
+
+ private:
+ void ShowEmptySuggestion();
+
+ void GetSuggestion();
+
+ void GetPredictionListAndEnd(const mojo::Array<mojo::String>& input_list);
+
+ prediction::PredictionServicePtr prediction_service_impl_;
+ std::vector<KeyLayout::Key*> suggestion_keys_;
+ std::string current_word_;
+ std::vector<std::string> previous_words_;
+ base::Callback<void()> on_update_callback_;
+
+ DISALLOW_COPY_AND_ASSIGN(Predictor);
+};
+
+} // namespace keyboard
+
+#endif // SERVICES_KEYBOARD_NATIVE_PREDICTOR_H_
\ No newline at end of file
diff --git a/services/keyboard_native/text_update_key.cc b/services/keyboard_native/text_update_key.cc
new file mode 100644
index 0000000..d39e2fd
--- /dev/null
+++ b/services/keyboard_native/text_update_key.cc
@@ -0,0 +1,77 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <string>
+
+#include "services/keyboard_native/text_update_key.h"
+#include "third_party/skia/include/core/SkCanvas.h"
+#include "third_party/skia/include/core/SkPaint.h"
+#include "third_party/skia/include/core/SkScalar.h"
+#include "ui/gfx/geometry/rect_f.h"
+
+namespace keyboard {
+
+TextUpdateKey::TextUpdateKey(
+ std::string text,
+ base::Callback<void(const TextUpdateKey&)> touch_up_callback)
+ : text_(text), touch_up_callback_(touch_up_callback) {
+}
+
+TextUpdateKey::~TextUpdateKey() {
+}
+
+// Key implementation.
+void TextUpdateKey::Draw(SkCanvas* canvas,
+ const SkPaint& paint,
+ const gfx::RectF& rect) {
+ std::string text_to_fit = text_;
+ SkRect bounds;
+ SkPaint paint_copy = paint;
+ paint_copy.measureText((const void*)(text_.c_str()), strlen(text_.c_str()),
+ &bounds);
+ bool text_need_scale = false;
+ if (bounds.width() > rect.width() * 0.8) {
+ text_need_scale = true;
+
+ paint_copy.setTextScaleX(SkFloatToScalar(0.6));
+ paint_copy.measureText((const void*)(text_.c_str()), strlen(text_.c_str()),
+ &bounds);
+ paint_copy.setTextScaleX(SkIntToScalar(1));
+ if (bounds.width() > rect.width() * 0.8) {
+ int dot_count = SkScalarTruncToInt((SkScalarToFloat(bounds.width()) -
+ SkScalarToFloat(rect.width()) * 0.8) /
+ SkScalarToFloat(bounds.width()) *
+ strlen(text_.c_str())) +
+ 1;
+ int dot_count_in_text = dot_count < 3 ? dot_count : 3;
+ std::string dots(dot_count_in_text, '.');
+ text_to_fit =
+ dots +
+ text_to_fit.substr(dot_count, text_to_fit.length() - dot_count);
+ }
+ }
+
+ float text_baseline_offset = rect.height() / 5.0f;
+ if (text_need_scale) {
+ paint_copy.setTextScaleX(SkFloatToScalar(0.6));
+ }
+ canvas->drawText(text_to_fit.c_str(), strlen(text_to_fit.c_str()),
+ rect.x() + (rect.width() / 2.0f),
+ rect.y() + rect.height() - text_baseline_offset, paint_copy);
+ paint_copy.setTextScaleX(SkIntToScalar(1));
+}
+
+const char* TextUpdateKey::ToText() const {
+ const char* text_char = text_.c_str();
+ return text_char;
+}
+
+void TextUpdateKey::OnTouchUp() {
+ touch_up_callback_.Run(*this);
+}
+
+void TextUpdateKey::ChangeText(std::string new_text) {
+ text_ = new_text;
+}
+} // namespace keyboard
\ No newline at end of file
diff --git a/services/keyboard_native/text_update_key.h b/services/keyboard_native/text_update_key.h
new file mode 100644
index 0000000..9644146
--- /dev/null
+++ b/services/keyboard_native/text_update_key.h
@@ -0,0 +1,48 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SERVICES_KEYBOARD_NATIVE_TEXT_UPDATE_KEY_H_
+#define SERVICES_KEYBOARD_NATIVE_TEXT_UPDATE_KEY_H_
+
+#include <string>
+
+#include "base/callback.h"
+#include "services/keyboard_native/key_layout.h"
+
+class SkCanvas;
+class SkPaint;
+
+namespace gfx {
+class RectF;
+}
+
+namespace keyboard {
+class TextUpdateKey : public KeyLayout::Key {
+ public:
+ TextUpdateKey(std::string text,
+ base::Callback<void(const TextUpdateKey&)> touch_up_callback);
+
+ ~TextUpdateKey() override;
+
+ // Key implementation.
+ void Draw(SkCanvas* canvas,
+ const SkPaint& paint,
+ const gfx::RectF& rect) override;
+
+ const char* ToText() const override;
+
+ void OnTouchUp() override;
+
+ void ChangeText(std::string new_text);
+
+ private:
+ std::string text_;
+ base::Callback<void(const TextUpdateKey&)> touch_up_callback_;
+
+ DISALLOW_COPY_AND_ASSIGN(TextUpdateKey);
+};
+
+} // namespace keyboard
+
+#endif // SERVICES_KEYBOARD_NATIVE_TEXT_UPDATE_KEY_H_
\ No newline at end of file
diff --git a/services/keyboard_native/view_observer_delegate.cc b/services/keyboard_native/view_observer_delegate.cc
index 99b677f..733443e 100644
--- a/services/keyboard_native/view_observer_delegate.cc
+++ b/services/keyboard_native/view_observer_delegate.cc
@@ -11,9 +11,11 @@
#include "mojo/gpu/gl_texture.h"
#include "mojo/public/cpp/application/application_impl.h"
#include "mojo/public/cpp/application/connect.h"
+#include "mojo/services/prediction/public/interfaces/prediction.mojom.h"
#include "mojo/skia/ganesh_surface.h"
#include "services/keyboard_native/clip_animation.h"
#include "services/keyboard_native/keyboard_service_impl.h"
+#include "services/keyboard_native/predictor.h"
#include "skia/ext/refptr.h"
#include "third_party/skia/include/core/SkCanvas.h"
#include "third_party/skia/include/core/SkTypeface.h"
@@ -36,6 +38,7 @@
ViewObserverDelegate::ViewObserverDelegate()
: keyboard_service_impl_(nullptr),
+ predictor_(nullptr),
view_(nullptr),
id_namespace_(0u),
surface_id_(1u),
@@ -49,6 +52,8 @@
base::Bind(&ViewObserverDelegate::OnText, weak_factory_.GetWeakPtr()));
key_layout_.SetDeleteCallback(
base::Bind(&ViewObserverDelegate::OnDelete, weak_factory_.GetWeakPtr()));
+ key_layout_.SetSuggestTextCallback(base::Bind(
+ &ViewObserverDelegate::OnSuggestText, weak_factory_.GetWeakPtr()));
submit_frame_callback_ = base::Bind(&ViewObserverDelegate::OnFrameComplete,
weak_factory_.GetWeakPtr());
}
@@ -92,6 +97,10 @@
surface_->CreateSurface(surface_id_);
surface_->GetIdNamespace(base::Bind(&ViewObserverDelegate::SetIdNamespace,
base::Unretained(this)));
+ predictor_ = new Predictor(shell);
+ predictor_->SetUpdateCallback(base::Bind(
+ &ViewObserverDelegate::OnUpdateSuggestion, weak_factory_.GetWeakPtr()));
+ key_layout_.SetPredictor(predictor_);
IssueDraw();
}
@@ -103,6 +112,15 @@
keyboard_service_impl_->OnDelete();
}
+void ViewObserverDelegate::OnSuggestText(const std::string& text) {
+ std::string text_with_space = text + " ";
+ keyboard_service_impl_->OnKey(text_with_space.c_str());
+}
+
+void ViewObserverDelegate::OnUpdateSuggestion() {
+ IssueDraw();
+}
+
void ViewObserverDelegate::UpdateState(int32 pointer_id,
int action,
const gfx::PointF& touch_point) {
diff --git a/services/keyboard_native/view_observer_delegate.h b/services/keyboard_native/view_observer_delegate.h
index 33c313c..9e9fa65 100644
--- a/services/keyboard_native/view_observer_delegate.h
+++ b/services/keyboard_native/view_observer_delegate.h
@@ -19,6 +19,7 @@
namespace keyboard {
class KeyboardServiceImpl;
+class Predictor;
struct PointerState {
KeyLayout::Key* last_key;
@@ -39,6 +40,8 @@
void OnFrameComplete();
void OnText(const std::string& text);
void OnDelete();
+ void OnSuggestText(const std::string& text);
+ void OnUpdateSuggestion();
void DrawState();
void DrawKeysToCanvas(const gfx::RectF& key_area, SkCanvas* canvas);
void DrawAnimations(SkCanvas* canvas, const base::TimeTicks& current_ticks);
@@ -58,6 +61,7 @@
void OnViewInputEvent(mojo::View* view, const mojo::EventPtr& event) override;
KeyboardServiceImpl* keyboard_service_impl_;
+ Predictor* predictor_;
mojo::View* view_;
uint32_t id_namespace_;
uint32_t surface_id_;
diff --git a/services/prediction/BUILD.gn b/services/prediction/BUILD.gn
index 9d8e531..647ad4c 100644
--- a/services/prediction/BUILD.gn
+++ b/services/prediction/BUILD.gn
@@ -3,18 +3,31 @@
# found in the LICENSE file.
import("//mojo/public/mojo_application.gni")
+import("//mojo/tools/embed/rules.gni")
mojo_native_application("prediction") {
output_name = "prediction_service"
sources = [
+ "dictionary_service.cc",
+ "dictionary_service.h",
+ "input_info.cc",
+ "input_info.h",
+ "key_set.h",
"prediction_service_impl.cc",
"prediction_service_impl.h",
+ "proximity_info_factory.cc",
+ "proximity_info_factory.h",
+ "touch_position_correction.cc",
+ "touch_position_correction.h",
]
deps = [
+ ":dict_file",
+ "//base",
"//mojo/application",
"//mojo/services/prediction/public/interfaces",
+ "//third_party/android_prediction:suggest",
]
}
@@ -36,3 +49,9 @@
data_deps = [ ":prediction($default_toolchain)" ]
}
+
+embed_file("dict_file") {
+ source = "res/main_en.dict"
+ namespace = "prediction"
+ variable = "kDictFile"
+}
diff --git a/services/prediction/dictionary_service.cc b/services/prediction/dictionary_service.cc
new file mode 100644
index 0000000..2e0600d
--- /dev/null
+++ b/services/prediction/dictionary_service.cc
@@ -0,0 +1,199 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <algorithm>
+#include <deque>
+#include <fstream>
+#include <string>
+
+#include "base/base_paths.h"
+#include "base/files/file_path.h"
+#include "base/path_service.h"
+#include "base/strings/string16.h"
+#include "base/strings/utf_string_conversions.h"
+#include "mojo/services/prediction/public/interfaces/prediction.mojom.h"
+#include "mojo/tools/embed/data.h"
+#include "services/prediction/dictionary_service.h"
+#include "services/prediction/input_info.h"
+#include "services/prediction/kDictFile.h"
+#include "services/prediction/key_set.h"
+#include "services/prediction/proximity_info_factory.h"
+#include "third_party/android_prediction/suggest/core/dictionary/dictionary.h"
+#include "third_party/android_prediction/suggest/core/result/suggestion_results.h"
+#include "third_party/android_prediction/suggest/core/session/dic_traverse_session.h"
+#include "third_party/android_prediction/suggest/core/session/prev_words_info.h"
+#include "third_party/android_prediction/suggest/core/suggest_options.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
+
+namespace prediction {
+
+DictionaryService::DictionaryService() : max_suggestion_size_(50) {
+}
+
+DictionaryService::~DictionaryService() {
+}
+
+void DictionaryService::CreatDictFromEmbeddedDataIfNotExist(
+ const std::string path) {
+ if (std::ifstream(path.c_str()))
+ return;
+ std::ofstream dic_file(path.c_str(),
+ std::ofstream::out | std::ofstream::binary);
+ dic_file.write(prediction::kDictFile.data, prediction::kDictFile.size);
+ dic_file.close();
+}
+
+latinime::Dictionary* const DictionaryService::OpenDictionary(
+ const std::string path,
+ const int start_offset,
+ const int size,
+ const bool is_updatable) {
+ latinime::DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ dictionary_structure_with_buffer_policy(
+ latinime::DictionaryStructureWithBufferPolicyFactory::
+ newPolicyForExistingDictFile(path.c_str(), start_offset, size,
+ is_updatable));
+ if (!dictionary_structure_with_buffer_policy) {
+ return nullptr;
+ }
+
+ latinime::Dictionary* const dictionary = new latinime::Dictionary(
+ std::move(dictionary_structure_with_buffer_policy));
+ return dictionary;
+}
+
+mojo::Array<mojo::String> DictionaryService::GetDictionarySuggestion(
+ PredictionInfoPtr prediction_info,
+ latinime::ProximityInfo* proximity_info) {
+ mojo::Array<mojo::String> suggestion_words =
+ mojo::Array<mojo::String>::New(0);
+
+ // dictionary
+ base::FilePath dir_temp;
+ PathService::Get(base::DIR_TEMP, &dir_temp);
+ std::string path = dir_temp.value() + "/main_en.dict";
+ if (!default_dictionary_) {
+ CreatDictFromEmbeddedDataIfNotExist(path);
+ default_dictionary_ = scoped_ptr<latinime::Dictionary>(
+ OpenDictionary(path, 0, prediction::kDictFile.size, false));
+ if (!default_dictionary_) {
+ return suggestion_words.Clone().Pass();
+ }
+ }
+
+ // dic_traverse_session
+ if (!default_session_) {
+ default_session_ = scoped_ptr<latinime::DicTraverseSession>(
+ reinterpret_cast<latinime::DicTraverseSession*>(
+ latinime::DicTraverseSession::getSessionInstance(
+ "en", prediction::kDictFile.size)));
+ latinime::PrevWordsInfo empty_prev_words;
+ default_session_->init(default_dictionary_.get(), &empty_prev_words, 0);
+ }
+
+ // current word
+ int input_size = std::min(
+ static_cast<int>(prediction_info->current_word.size()), MAX_WORD_LENGTH);
+ InputInfo input_info(prediction_info->current_word, input_size);
+ input_size = input_info.GetRealSize();
+
+ // previous words
+ latinime::PrevWordsInfo prev_words_info =
+ ProcessPrevWord(prediction_info->previous_words);
+
+ // suggestion options
+ // is_gesture, use_full_edit_distance,
+ // block_offensive_words, space_aware gesture_enabled
+ int options[] = {0, 0, 0, 0};
+ latinime::SuggestOptions suggest_options(options, arraysize(options));
+
+ latinime::SuggestionResults suggestion_results(max_suggestion_size_);
+ if (input_size > 0) {
+ default_dictionary_->getSuggestions(
+ proximity_info, default_session_.get(), input_info.GetXCoordinates(),
+ input_info.GetYCoordinates(), input_info.GetTimes(),
+ input_info.GetPointerIds(), input_info.GetCodepoints(), input_size,
+ &prev_words_info, &suggest_options, -1.0f, &suggestion_results);
+ } else {
+ default_dictionary_->getPredictions(&prev_words_info, &suggestion_results);
+ }
+
+ // process suggestion results
+ std::deque<std::string> suggestion_words_reverse;
+ char cur_beginning;
+ std::string lo_cur;
+ std::string up_cur;
+ if (input_size > 0) {
+ cur_beginning = prediction_info->current_word[0];
+ std::string cur_rest =
+ std::string(prediction_info->current_word.data())
+ .substr(1, prediction_info->current_word.size() - 1);
+ lo_cur = std::string(1, (char)tolower(cur_beginning)) + cur_rest;
+ up_cur = std::string(1, (char)toupper(cur_beginning)) + cur_rest;
+ }
+ while (!suggestion_results.mSuggestedWords.empty()) {
+ const latinime::SuggestedWord& suggested_word =
+ suggestion_results.mSuggestedWords.top();
+ base::string16 word;
+ for (int i = 0; i < suggested_word.getCodePointCount(); i++) {
+ base::char16 code_point = suggested_word.getCodePoint()[i];
+ word.push_back(code_point);
+ }
+ std::string word_string = base::UTF16ToUTF8(word);
+ if (word_string.compare(lo_cur) != 0 && word_string.compare(up_cur) != 0) {
+ if (input_size > 0 && isupper(cur_beginning)) {
+ word_string[0] = toupper(word_string[0]);
+ }
+ suggestion_words_reverse.push_front(word_string);
+ }
+ suggestion_results.mSuggestedWords.pop();
+ }
+
+ // remove dups within suggestion words
+ for (size_t i = 0; i < suggestion_words_reverse.size(); i++) {
+ for (size_t j = i + 1; j < suggestion_words_reverse.size(); j++) {
+ if (suggestion_words_reverse[i].compare(suggestion_words_reverse[j]) ==
+ 0) {
+ suggestion_words_reverse.erase(suggestion_words_reverse.begin() + j);
+ j--;
+ }
+ }
+ }
+
+ for (std::deque<std::string>::iterator it = suggestion_words_reverse.begin();
+ it != suggestion_words_reverse.end(); ++it) {
+ suggestion_words.push_back(mojo::String(*it));
+ }
+
+ return suggestion_words.Clone().Pass();
+}
+
+// modified from Android JniDataUtils::constructPrevWordsInfo
+latinime::PrevWordsInfo DictionaryService::ProcessPrevWord(
+ mojo::Array<PrevWordInfoPtr>& prev_words) {
+ int prev_word_codepoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH];
+ int prev_word_codepoint_count[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ bool are_beginning_of_sentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ int prevwords_count = std::min(
+ prev_words.size(), static_cast<size_t>(MAX_PREV_WORD_COUNT_FOR_N_GRAM));
+ for (int i = 0; i < prevwords_count; ++i) {
+ prev_word_codepoint_count[i] = 0;
+ are_beginning_of_sentence[i] = false;
+ int prev_word_size = prev_words[i]->word.size();
+ if (prev_word_size > MAX_WORD_LENGTH) {
+ continue;
+ }
+ for (int j = 0; j < prev_word_size; j++) {
+ prev_word_codepoints[i][j] = (int)((prev_words[i])->word)[j];
+ }
+ prev_word_codepoint_count[i] = prev_word_size;
+ are_beginning_of_sentence[i] = prev_words[i]->is_beginning_of_sentence;
+ }
+ latinime::PrevWordsInfo prev_words_info =
+ latinime::PrevWordsInfo(prev_word_codepoints, prev_word_codepoint_count,
+ are_beginning_of_sentence, prevwords_count);
+ return prev_words_info;
+}
+
+} // namespace prediction
\ No newline at end of file
diff --git a/services/prediction/dictionary_service.h b/services/prediction/dictionary_service.h
new file mode 100644
index 0000000..ba174db
--- /dev/null
+++ b/services/prediction/dictionary_service.h
@@ -0,0 +1,51 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SERVICES_PREDICTION_DICTIONARY_SERVICE_H_
+#define SERVICES_PREDICTION_DICTIONARY_SERVICE_H_
+
+#include "base/macros.h"
+#include "base/memory/scoped_ptr.h"
+#include "mojo/services/prediction/public/interfaces/prediction.mojom.h"
+#include "services/prediction/proximity_info_factory.h"
+
+namespace latinime {
+
+class Dictionary;
+class DicTraverseSession;
+class PrevWordsInfo;
+} // namespace latinime
+
+namespace prediction {
+
+class DictionaryService {
+ public:
+ DictionaryService();
+ ~DictionaryService();
+
+ mojo::Array<mojo::String> GetDictionarySuggestion(
+ PredictionInfoPtr prediction_info,
+ latinime::ProximityInfo* proximity_info);
+
+ private:
+ void CreatDictFromEmbeddedDataIfNotExist(const std::string path);
+
+ latinime::Dictionary* const OpenDictionary(const std::string path,
+ const int start_offset,
+ const int size,
+ const bool is_updatable);
+
+ latinime::PrevWordsInfo ProcessPrevWord(
+ mojo::Array<PrevWordInfoPtr>& prev_words);
+
+ int max_suggestion_size_;
+ scoped_ptr<latinime::Dictionary> default_dictionary_;
+ scoped_ptr<latinime::DicTraverseSession> default_session_;
+
+ DISALLOW_COPY_AND_ASSIGN(DictionaryService);
+};
+
+} // namespace prediction
+
+#endif // SERVICES_PREDICTION_DICTIONARY_SERVICE_H_
diff --git a/services/prediction/input_info.cc b/services/prediction/input_info.cc
new file mode 100644
index 0000000..061c0c8
--- /dev/null
+++ b/services/prediction/input_info.cc
@@ -0,0 +1,86 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <new>
+
+#include "services/prediction/input_info.h"
+#include "services/prediction/key_set.h"
+#include "third_party/android_prediction/defines.h"
+
+namespace prediction {
+
+InputInfo::InputInfo(mojo::String& input, int input_size) {
+ real_size_ = 0;
+ for (int i = 0; i < input_size; i++) {
+ int codepoint = (int)input[i];
+ if ((codepoint >= 'a' && codepoint <= 'z') ||
+ (codepoint >= 'A' && codepoint <= 'Z')) {
+ real_size_++;
+ }
+ }
+ codepoints_ = new int[real_size_];
+ x_coordinates_ = new int[real_size_];
+ y_coordinates_ = new int[real_size_];
+ pointer_ids_ = new int[real_size_];
+ times_ = new int[real_size_];
+
+ ProcessInput(input, input_size);
+}
+
+InputInfo::~InputInfo() {
+ delete[] codepoints_;
+ delete[] x_coordinates_;
+ delete[] y_coordinates_;
+ delete[] pointer_ids_;
+ delete[] times_;
+}
+
+int* InputInfo::GetCodepoints() {
+ return codepoints_;
+}
+
+int* InputInfo::GetXCoordinates() {
+ return x_coordinates_;
+}
+
+int* InputInfo::GetYCoordinates() {
+ return y_coordinates_;
+}
+
+int* InputInfo::GetPointerIds() {
+ return pointer_ids_;
+}
+
+int* InputInfo::GetTimes() {
+ return times_;
+}
+
+int InputInfo::GetRealSize() {
+ return real_size_;
+}
+
+void InputInfo::ProcessInput(mojo::String& input, int input_size) {
+ int real_index = 0;
+ for (int i = 0; i < input_size; i++) {
+ int codepoint = (int)input[i];
+ if ((codepoint >= 'a' && codepoint <= 'z') ||
+ (codepoint >= 'A' && codepoint <= 'Z')) {
+ codepoints_[real_index] = codepoint;
+ for (int j = 0; j < keyset::key_count; j++) {
+ if (keyset::key_set[j].kcode == tolower(codepoint)) {
+ x_coordinates_[real_index] =
+ keyset::key_set[j].kx + keyset::key_set[j].kwidth / 2;
+ y_coordinates_[real_index] =
+ keyset::key_set[j].ky + keyset::key_set[j].kheight / 2;
+ break;
+ }
+ }
+ pointer_ids_[real_index] = 0;
+ times_[real_index] = 0;
+ real_index++;
+ }
+ }
+}
+
+} // namespace prediction
\ No newline at end of file
diff --git a/services/prediction/input_info.h b/services/prediction/input_info.h
new file mode 100644
index 0000000..5f7caa4
--- /dev/null
+++ b/services/prediction/input_info.h
@@ -0,0 +1,38 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SERVICES_PREDICTION_INPUT_INFO_H_
+#define SERVICES_PREDICTION_INPUT_INFO_H_
+
+#include "mojo/services/prediction/public/interfaces/prediction.mojom.h"
+
+namespace prediction {
+
+class InputInfo {
+ public:
+ InputInfo(mojo::String& input, int input_size);
+ ~InputInfo();
+
+ int* GetCodepoints();
+ int* GetXCoordinates();
+ int* GetYCoordinates();
+ int* GetPointerIds();
+ int* GetTimes();
+ int GetRealSize();
+
+ private:
+ void ProcessInput(mojo::String& input, int input_size);
+
+ int real_size_;
+ int* codepoints_;
+ int* x_coordinates_;
+ int* y_coordinates_;
+ int* pointer_ids_;
+ int* times_;
+
+}; // class InputInfo
+
+} // namespace prediction
+
+#endif // SERVICES_PREDICTION_INPUT_INFO_H_
diff --git a/services/prediction/key_set.h b/services/prediction/key_set.h
new file mode 100644
index 0000000..8b72b22
--- /dev/null
+++ b/services/prediction/key_set.h
@@ -0,0 +1,92 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SERVICES_PREDICTION_KEY_SET_H_
+#define SERVICES_PREDICTION_KEY_SET_H_
+
+#include "mojo/services/prediction/public/interfaces/prediction.mojom.h"
+
+// qwerty keyboard key sets
+
+namespace prediction {
+
+// NOTE: This struct has been modified from the Android Open
+// Source Project. Specifically from the following file:
+// https://android.googlesource.com/platform/packages/inputmethods/LatinIME/+/
+// android-5.1.1_r8/java/src/com/android/inputmethod/keyboard/Key.java
+struct Key {
+ int kcode;
+ // Width of the key, not including the gap
+ int kwidth;
+ // Height of the key, not including the gap
+ int kheight;
+ // X coordinate of the key in the keyboard layout
+ int kx;
+ // Y coordinate of the key in the keyboard layout
+ int ky;
+ // Hit bounding box of the key
+ int khit_box_left;
+ int khit_box_top;
+ int khit_box_right;
+ int khit_box_bottom;
+
+ Key() {}
+
+ Key(const int code,
+ const int x,
+ const int y,
+ const int width,
+ const int height,
+ const int horizontal_gap,
+ const int vertical_gap) {
+ kheight = height - vertical_gap;
+ kwidth = width - horizontal_gap;
+ kcode = code;
+ kx = x + horizontal_gap / 2;
+ ky = y;
+ khit_box_left = x;
+ khit_box_top = y;
+ khit_box_right = x + width + 1;
+ khit_box_bottom = y + height;
+ }
+};
+
+namespace keyset {
+
+const Key A(97, 43, 58, 29, 58, 4, 9);
+const Key B(98, 188, 116, 29, 58, 4, 9);
+const Key C(99, 130, 116, 29, 58, 4, 9);
+const Key D(100, 101, 58, 29, 58, 4, 9);
+const Key E(101, 87, 0, 29, 58, 4, 9);
+const Key F(102, 130, 58, 29, 58, 4, 9);
+const Key G(103, 159, 58, 29, 58, 4, 9);
+const Key H(104, 188, 58, 29, 58, 4, 9);
+const Key I(105, 232, 0, 29, 58, 4, 9);
+const Key J(106, 217, 58, 29, 58, 4, 9);
+const Key K(107, 246, 58, 29, 58, 4, 9);
+const Key L(108, 275, 58, 29, 58, 4, 9);
+const Key M(109, 246, 116, 29, 58, 4, 9);
+const Key N(110, 217, 116, 29, 58, 4, 9);
+const Key O(111, 261, 0, 29, 58, 4, 9);
+const Key P(112, 290, 0, 29, 58, 4, 9);
+const Key Q(113, 29, 0, 29, 58, 4, 9);
+const Key R(114, 116, 0, 29, 58, 4, 9);
+const Key S(115, 72, 58, 29, 58, 4, 9);
+const Key T(116, 145, 0, 29, 58, 4, 9);
+const Key U(117, 203, 0, 29, 58, 4, 9);
+const Key V(118, 159, 116, 29, 58, 4, 9);
+const Key W(119, 58, 0, 29, 58, 4, 9);
+const Key X(120, 101, 116, 29, 58, 4, 9);
+const Key Y(121, 174, 0, 29, 58, 4, 9);
+const Key Z(122, 72, 116, 29, 58, 4, 9);
+
+const Key key_set[] = {Q, W, E, R, T, Y, U, I, O, P, A, S, D,
+ F, G, H, J, K, L, Z, X, C, V, B, N, M};
+
+const int key_count = 26;
+
+} // namespace keyset
+} // namespace prediction
+
+#endif // SERVICES_PREDICTION_KEY_SET_H_
diff --git a/services/prediction/prediction_apptests.cc b/services/prediction/prediction_apptests.cc
index effaebb..611c54c 100644
--- a/services/prediction/prediction_apptests.cc
+++ b/services/prediction/prediction_apptests.cc
@@ -30,21 +30,11 @@
&prediction_);
}
- void SetSettingsClient(bool correction,
- bool offensive,
- bool space_aware_gesture) {
- SettingsPtr settings = Settings::New();
- settings->correction_enabled = correction;
- settings->block_potentially_offensive = offensive;
- settings->space_aware_gesture_enabled = space_aware_gesture;
- prediction_->SetSettings(settings.Pass());
- }
-
std::vector<std::string> GetPredictionListClient(
- const mojo::Array<mojo::String>& prev_words,
+ mojo::Array<PrevWordInfoPtr>& prev_words,
const mojo::String& cur_word) {
PredictionInfoPtr prediction_info = PredictionInfo::New();
- prediction_info->previous_words = prev_words.Clone().Pass();
+ prediction_info->previous_words = prev_words.Pass();
prediction_info->current_word = cur_word;
std::vector<std::string> prediction_list;
@@ -61,12 +51,76 @@
DISALLOW_COPY_AND_ASSIGN(PredictionApptest);
};
-TEST_F(PredictionApptest, PredictCat) {
- SetSettingsClient(true, true, true);
- mojo::Array<mojo::String> prev_words;
- prev_words.push_back("dog");
- std::string prediction_cat = GetPredictionListClient(prev_words, "d")[0];
- EXPECT_EQ(prediction_cat, "cat");
+TEST_F(PredictionApptest, CurrentSpellcheck) {
+ mojo::Array<PrevWordInfoPtr> prev_words =
+ mojo::Array<PrevWordInfoPtr>::New(0);
+ std::string prediction = GetPredictionListClient(prev_words, "tgis")[0];
+ EXPECT_EQ(prediction, "this");
+
+ mojo::Array<PrevWordInfoPtr> prev_words1 =
+ mojo::Array<PrevWordInfoPtr>::New(0);
+ std::string prediction1 = GetPredictionListClient(prev_words1, "aplle")[0];
+ EXPECT_EQ(prediction1, "Apple");
+}
+
+TEST_F(PredictionApptest, CurrentSuggest) {
+ mojo::Array<PrevWordInfoPtr> prev_words =
+ mojo::Array<PrevWordInfoPtr>::New(0);
+ std::string prediction = GetPredictionListClient(prev_words, "peac")[0];
+ EXPECT_EQ(prediction, "peace");
+
+ mojo::Array<PrevWordInfoPtr> prev_words1 =
+ mojo::Array<PrevWordInfoPtr>::New(0);
+ std::string prediction1 = GetPredictionListClient(prev_words1, "fil")[0];
+ EXPECT_EQ(prediction1, "film");
+
+ mojo::Array<PrevWordInfoPtr> prev_words2 =
+ mojo::Array<PrevWordInfoPtr>::New(0);
+ std::string prediction2 = GetPredictionListClient(prev_words2, "entert")[0];
+ EXPECT_EQ(prediction2, "entertainment");
+}
+
+TEST_F(PredictionApptest, CurrentSuggestCont) {
+ mojo::Array<PrevWordInfoPtr> prev_words =
+ mojo::Array<PrevWordInfoPtr>::New(0);
+ std::string prediction = GetPredictionListClient(prev_words, "a")[0];
+ EXPECT_EQ(prediction, "and");
+
+ mojo::Array<PrevWordInfoPtr> prev_words1 =
+ mojo::Array<PrevWordInfoPtr>::New(0);
+ std::string prediction1 = GetPredictionListClient(prev_words1, "ab")[0];
+ EXPECT_EQ(prediction1, "an");
+}
+
+TEST_F(PredictionApptest, CurrentSuggestUp) {
+ mojo::Array<PrevWordInfoPtr> prev_words =
+ mojo::Array<PrevWordInfoPtr>::New(0);
+ std::string prediction = GetPredictionListClient(prev_words, "Beau")[0];
+ EXPECT_EQ(prediction, "Beat");
+
+ mojo::Array<PrevWordInfoPtr> prev_words1 =
+ mojo::Array<PrevWordInfoPtr>::New(0);
+ std::string prediction1 = GetPredictionListClient(prev_words1, "THis")[0];
+ EXPECT_EQ(prediction1, "This");
+}
+
+TEST_F(PredictionApptest, CurrentNoSuggest) {
+ mojo::Array<PrevWordInfoPtr> prev_words =
+ mojo::Array<PrevWordInfoPtr>::New(0);
+ std::string prediction = GetPredictionListClient(
+ prev_words,
+ "hjlahflgfagfdafaffgruhgadfhjklghadflkghjalkdfjkldfhrshrtshtsrhkra")[0];
+ EXPECT_EQ(prediction, "homage offered a faded rugged should had dough");
+}
+
+TEST_F(PredictionApptest, EmptyCurrent) {
+ PrevWordInfoPtr prev_word = PrevWordInfo::New();
+ prev_word->word = "This";
+ prev_word->is_beginning_of_sentence = true;
+ mojo::Array<PrevWordInfoPtr> prev_words =
+ mojo::Array<PrevWordInfoPtr>::New(0);
+ prev_words.push_back(prev_word.Pass());
+ EXPECT_EQ((int)GetPredictionListClient(prev_words, "").size(), 0);
}
} // namespace prediction
diff --git a/services/prediction/prediction_service_impl.cc b/services/prediction/prediction_service_impl.cc
index cd3079a..ccc1596 100644
--- a/services/prediction/prediction_service_impl.cc
+++ b/services/prediction/prediction_service_impl.cc
@@ -7,6 +7,7 @@
#include "mojo/public/cpp/application/application_connection.h"
#include "mojo/public/cpp/application/application_delegate.h"
#include "mojo/public/cpp/bindings/strong_binding.h"
+#include "services/prediction/dictionary_service.h"
#include "services/prediction/prediction_service_impl.h"
namespace prediction {
@@ -14,28 +15,21 @@
PredictionServiceImpl::PredictionServiceImpl(
mojo::InterfaceRequest<PredictionService> request)
: strong_binding_(this, request.Pass()) {
+ ProximityInfoFactory proximity_info;
+ proximity_settings_ = scoped_ptr<latinime::ProximityInfo>(
+ proximity_info.GetNativeProximityInfo());
}
PredictionServiceImpl::~PredictionServiceImpl() {
}
// PredictionService implementation
-void PredictionServiceImpl::SetSettings(SettingsPtr settings) {
- stored_settings_.correction_enabled = settings->correction_enabled;
- stored_settings_.block_potentially_offensive =
- settings->block_potentially_offensive;
- stored_settings_.space_aware_gesture_enabled =
- settings->space_aware_gesture_enabled;
-}
-
-// only predict "cat" no matter what prediction_info
-// has for now
void PredictionServiceImpl::GetPredictionList(
PredictionInfoPtr prediction_info,
const GetPredictionListCallback& callback) {
- mojo::String cat = "cat";
- mojo::Array<mojo::String> prediction_list;
- prediction_list.push_back(cat);
+ mojo::Array<mojo::String> prediction_list =
+ dictionary_service_.GetDictionarySuggestion(prediction_info.Pass(),
+ proximity_settings_.get());
callback.Run(prediction_list.Pass());
}
diff --git a/services/prediction/prediction_service_impl.h b/services/prediction/prediction_service_impl.h
index b0dbdab..6238f2e 100644
--- a/services/prediction/prediction_service_impl.h
+++ b/services/prediction/prediction_service_impl.h
@@ -5,6 +5,7 @@
#ifndef SERVICES_PREDICTION_PREDICTION_SERVICE_IMPL_H_
#define SERVICES_PREDICTION_PREDICTION_SERVICE_IMPL_H_
+#include "base/memory/scoped_ptr.h"
#include "mojo/services/prediction/public/interfaces/prediction.mojom.h"
namespace prediction {
@@ -16,15 +17,15 @@
~PredictionServiceImpl() override;
// PredictionService implementation
- void SetSettings(SettingsPtr settings) override;
-
void GetPredictionList(PredictionInfoPtr prediction_info,
const GetPredictionListCallback& callback) override;
private:
- Settings stored_settings_;
mojo::StrongBinding<PredictionService> strong_binding_;
+ scoped_ptr<latinime::ProximityInfo> proximity_settings_;
+ DictionaryService dictionary_service_;
+
DISALLOW_COPY_AND_ASSIGN(PredictionServiceImpl);
};
diff --git a/services/prediction/proximity_info_factory.cc b/services/prediction/proximity_info_factory.cc
new file mode 100644
index 0000000..2c501f8
--- /dev/null
+++ b/services/prediction/proximity_info_factory.cc
@@ -0,0 +1,190 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <algorithm>
+#include <cmath>
+#include <new>
+#include <vector>
+
+#include "services/prediction/proximity_info_factory.h"
+#include "services/prediction/touch_position_correction.h"
+
+// NOTE: This class has been translated to C++ and modified from the Android
+// Open Source Project. Specifically from some functions of the following file:
+// https://android.googlesource.com/platform/packages/inputmethods/LatinIME/+/
+// android-5.1.1_r8/java/src/com/android/inputmethod/keyboard/ProximityInfo.java
+
+namespace prediction {
+
+const float ProximityInfoFactory::SEARCH_DISTANCE = 1.2f;
+
+const float ProximityInfoFactory::DEFAULT_TOUCH_POSITION_CORRECTION_RADIUS =
+ 0.15f;
+
+// Hardcoded qwerty keyboard proximity settings
+ProximityInfoFactory::ProximityInfoFactory() {
+ plocale_ = "en";
+ pgrid_width_ = 32;
+ pgrid_height_ = 16;
+ pgrid_size_ = pgrid_width_ * pgrid_height_;
+ pcell_width_ = (348 + pgrid_width_ - 1) / pgrid_width_;
+ pcell_height_ = (174 + pgrid_height_ - 1) / pgrid_height_;
+ pkeyboard_min_width_ = 348;
+ pkeyboard_height_ = 174;
+ pmost_common_key_height_ = 29;
+ pmost_common_key_width_ = 58;
+}
+
+ProximityInfoFactory::~ProximityInfoFactory() {
+}
+
+latinime::ProximityInfo* ProximityInfoFactory::GetNativeProximityInfo() {
+ const int default_width = pmost_common_key_width_;
+ const int threshold = (int)(default_width * SEARCH_DISTANCE);
+ const int threshold_squared = threshold * threshold;
+ const int last_pixel_x_coordinate = pgrid_width_ * pcell_width_ - 1;
+ const int last_pixel_y_coordinate = pgrid_height_ * pcell_height_ - 1;
+
+ std::vector<Key> pgrid_neighbors[32 * 16 /*pgrid_size_*/];
+ int neighbor_count_per_cell[pgrid_size_];
+ std::fill_n(neighbor_count_per_cell, pgrid_size_, 0);
+ Key neighbors_flat_buffer[32 * 16 * 26 /*pgrid_size_ * keyset::key_count*/];
+
+ const int half_cell_width = pcell_width_ / 2;
+ const int half_cell_height = pcell_height_ / 2;
+ for (int i = 0; i < keyset::key_count; i++) {
+ const Key key = keyset::key_set[i];
+
+ const int key_x = key.kx;
+ const int key_y = key.ky;
+ const int top_pixel_within_threshold = key_y - threshold;
+ const int y_delta_to_grid = top_pixel_within_threshold % pcell_height_;
+ const int y_middle_of_top_cell =
+ top_pixel_within_threshold - y_delta_to_grid + half_cell_height;
+ const int y_start =
+ std::max(half_cell_height,
+ y_middle_of_top_cell +
+ (y_delta_to_grid <= half_cell_height ? 0 : pcell_height_));
+ const int y_end =
+ std::min(last_pixel_y_coordinate, key_y + key.kheight + threshold);
+
+ const int left_pixel_within_threshold = key_x - threshold;
+ const int x_delta_to_grid = left_pixel_within_threshold % pcell_width_;
+ const int x_middle_of_left_cell =
+ left_pixel_within_threshold - x_delta_to_grid + half_cell_width;
+ const int x_start =
+ std::max(half_cell_width,
+ x_middle_of_left_cell +
+ (x_delta_to_grid <= half_cell_width ? 0 : pcell_width_));
+ const int x_end =
+ std::min(last_pixel_x_coordinate, key_x + key.kwidth + threshold);
+
+ int base_index_of_current_row =
+ (y_start / pcell_height_) * pgrid_width_ + (x_start / pcell_width_);
+ for (int center_y = y_start; center_y <= y_end; center_y += pcell_height_) {
+ int index = base_index_of_current_row;
+ for (int center_x = x_start; center_x <= x_end;
+ center_x += pcell_width_) {
+ if (SquaredDistanceToEdge(center_x, center_y, key) <
+ threshold_squared) {
+ neighbors_flat_buffer[index * keyset::key_count +
+ neighbor_count_per_cell[index]] =
+ keyset::key_set[i];
+ ++neighbor_count_per_cell[index];
+ }
+ ++index;
+ }
+ base_index_of_current_row += pgrid_width_;
+ }
+ }
+
+ for (int i = 0; i < pgrid_size_; ++i) {
+ const int index_start = i * keyset::key_count;
+ const int index_end = index_start + neighbor_count_per_cell[i];
+ for (int index = index_start; index < index_end; index++) {
+ pgrid_neighbors[i].push_back(neighbors_flat_buffer[index]);
+ }
+ }
+
+ int proximity_chars_array[pgrid_size_ * MAX_PROXIMITY_CHARS_SIZE];
+ for (int i = 0; i < pgrid_size_; i++) {
+ int info_index = i * MAX_PROXIMITY_CHARS_SIZE;
+ for (int j = 0; j < neighbor_count_per_cell[i]; j++) {
+ Key neighbor_key = pgrid_neighbors[i][j];
+ proximity_chars_array[info_index] = neighbor_key.kcode;
+ info_index++;
+ }
+ }
+
+ int key_x_coordinates[keyset::key_count];
+ int key_y_coordinates[keyset::key_count];
+ int key_widths[keyset::key_count];
+ int key_heights[keyset::key_count];
+ int key_char_codes[keyset::key_count];
+ float sweet_spot_center_xs[keyset::key_count];
+ float sweet_spot_center_ys[keyset::key_count];
+ float sweet_spot_radii[keyset::key_count];
+
+ for (int key_index = 0; key_index < keyset::key_count; key_index++) {
+ Key key = keyset::key_set[key_index];
+ key_x_coordinates[key_index] = key.kx;
+ key_y_coordinates[key_index] = key.ky;
+ key_widths[key_index] = key.kwidth;
+ key_heights[key_index] = key.kheight;
+ key_char_codes[key_index] = key.kcode;
+ }
+
+ TouchPositionCorrection touch_position_correction;
+ if (touch_position_correction.IsValid()) {
+ const int rows = touch_position_correction.GetRows();
+ const float default_radius =
+ DEFAULT_TOUCH_POSITION_CORRECTION_RADIUS *
+ (float)std::hypot(pmost_common_key_width_, pmost_common_key_height_);
+ for (int key_index = 0; key_index < keyset::key_count; key_index++) {
+ Key key = keyset::key_set[key_index];
+ sweet_spot_center_xs[key_index] =
+ (key.khit_box_left + key.khit_box_right) * 0.5f;
+ sweet_spot_center_ys[key_index] =
+ (key.khit_box_top + key.khit_box_bottom) * 0.5f;
+ sweet_spot_radii[key_index] = default_radius;
+ const int row = key.khit_box_top / pmost_common_key_height_;
+ if (row < rows) {
+ const int hit_box_width = key.khit_box_right - key.khit_box_left;
+ const int hit_box_height = key.khit_box_bottom - key.khit_box_top;
+ const float hit_box_diagonal =
+ (float)std::hypot(hit_box_width, hit_box_height);
+ sweet_spot_center_xs[key_index] +=
+ touch_position_correction.GetX(row) * hit_box_width;
+ sweet_spot_center_ys[key_index] +=
+ touch_position_correction.GetY(row) * hit_box_height;
+ sweet_spot_radii[key_index] =
+ touch_position_correction.GetRadius(row) * hit_box_diagonal;
+ }
+ }
+ }
+
+ latinime::ProximityInfo* proximity_info = new latinime::ProximityInfo(
+ plocale_, pkeyboard_min_width_, pkeyboard_height_, pgrid_width_,
+ pgrid_height_, pmost_common_key_width_, pmost_common_key_height_,
+ proximity_chars_array, pgrid_size_ * MAX_PROXIMITY_CHARS_SIZE,
+ keyset::key_count, key_x_coordinates, key_y_coordinates, key_widths,
+ key_heights, key_char_codes, sweet_spot_center_xs, sweet_spot_center_ys,
+ sweet_spot_radii);
+
+ return proximity_info;
+}
+
+int ProximityInfoFactory::SquaredDistanceToEdge(int x, int y, Key k) {
+ const int left = k.kx;
+ const int right = left + k.kwidth;
+ const int top = k.ky;
+ const int bottom = top + k.kheight;
+ const int edge_x = x < left ? left : (x > right ? right : x);
+ const int edge_y = y < top ? top : (y > bottom ? bottom : y);
+ const int dx = x - edge_x;
+ const int dy = y - edge_y;
+ return dx * dx + dy * dy;
+}
+
+} // namespace prediction
\ No newline at end of file
diff --git a/services/prediction/proximity_info_factory.h b/services/prediction/proximity_info_factory.h
new file mode 100644
index 0000000..3ac8186
--- /dev/null
+++ b/services/prediction/proximity_info_factory.h
@@ -0,0 +1,47 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SERVICES_PREDICTION_PROXIMITY_INFO_FACTORY_H_
+#define SERVICES_PREDICTION_PROXIMITY_INFO_FACTORY_H_
+
+#include "mojo/services/prediction/public/interfaces/prediction.mojom.h"
+#include "services/prediction/key_set.h"
+#include "third_party/android_prediction/suggest/core/layout/proximity_info.h"
+
+// NOTE: This class has been translated to C++ and modified from the Android
+// Open Source Project. Specifically from some functions of the following file:
+// https://android.googlesource.com/platform/packages/inputmethods/LatinIME/+/
+// android-5.1.1_r8/java/src/com/android/inputmethod/keyboard/ProximityInfo.java
+
+namespace prediction {
+
+class ProximityInfoFactory {
+ public:
+ ProximityInfoFactory();
+ ~ProximityInfoFactory();
+
+ latinime::ProximityInfo* GetNativeProximityInfo();
+
+ private:
+ // Number of key widths from current touch point to search for nearest keys.
+ static const float SEARCH_DISTANCE;
+ static const float DEFAULT_TOUCH_POSITION_CORRECTION_RADIUS;
+
+ int SquaredDistanceToEdge(int x, int y, Key k);
+
+ int pgrid_width_;
+ int pgrid_height_;
+ int pgrid_size_;
+ int pcell_width_;
+ int pcell_height_;
+ int pkeyboard_min_width_;
+ int pkeyboard_height_;
+ int pmost_common_key_width_;
+ int pmost_common_key_height_;
+ std::string plocale_;
+}; // class ProximityInfoFactory
+
+} // namespace prediction
+
+#endif // SERVICES_PREDICTION_PROXIMITY_INFO_FACTORY_H_
diff --git a/services/prediction/res/main_en.dict.sha1 b/services/prediction/res/main_en.dict.sha1
new file mode 100644
index 0000000..59c8e11
--- /dev/null
+++ b/services/prediction/res/main_en.dict.sha1
@@ -0,0 +1 @@
+40966defb8c63340382e264022d9fea668ac5494
\ No newline at end of file
diff --git a/services/prediction/touch_position_correction.cc b/services/prediction/touch_position_correction.cc
new file mode 100644
index 0000000..e2eea2b
--- /dev/null
+++ b/services/prediction/touch_position_correction.cc
@@ -0,0 +1,72 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "services/prediction/touch_position_correction.h"
+
+// NOTE: This class has been translated to C++ and modified from the Android
+// Open Source Project. Specifically from some functions of the following file:
+// https://android.googlesource.com/platform/packages/inputmethods/LatinIME/+/
+// android-5.1.1_r8/java/src/com/android/inputmethod/keyboard/internal/
+// TouchPositionCorrection.java
+
+namespace prediction {
+
+const int TouchPositionCorrection::TOUCH_POSITION_CORRECTION_RECORD_SIZE = 3;
+
+TouchPositionCorrection::TouchPositionCorrection() {
+ // value currently used by Android TouchPositionCorrection
+ std::string data[9] = {"0.0038756",
+ "-0.0005677",
+ "0.1577026",
+ "-0.0236678",
+ "0.0381731",
+ "0.1529972",
+ "-0.0086827",
+ "0.0880847",
+ "0.1522819"};
+ const int data_length = 9;
+ if (data_length % TOUCH_POSITION_CORRECTION_RECORD_SIZE != 0) {
+ return;
+ }
+
+ for (int i = 0; i < data_length; ++i) {
+ const int type = i % TOUCH_POSITION_CORRECTION_RECORD_SIZE;
+ const int index = i / TOUCH_POSITION_CORRECTION_RECORD_SIZE;
+ const float value = std::stof(data[i]);
+ if (type == 0) {
+ xs_[index] = value;
+ } else if (type == 1) {
+ ys_[index] = value;
+ } else {
+ radii_[index] = value;
+ }
+ }
+ enabled_ = data_length > 0;
+}
+
+TouchPositionCorrection::~TouchPositionCorrection() {
+}
+
+bool TouchPositionCorrection::IsValid() {
+ return enabled_;
+}
+
+int TouchPositionCorrection::GetRows() {
+ return 3;
+}
+
+float TouchPositionCorrection::GetX(const int row) {
+ // Touch position correction data for X coordinate is obsolete.
+ return 0.0f;
+}
+
+float TouchPositionCorrection::GetY(const int row) {
+ return ys_[row];
+}
+
+float TouchPositionCorrection::GetRadius(const int row) {
+ return radii_[row];
+}
+
+} // namespace prediction
\ No newline at end of file
diff --git a/services/prediction/touch_position_correction.h b/services/prediction/touch_position_correction.h
new file mode 100644
index 0000000..1ccf880
--- /dev/null
+++ b/services/prediction/touch_position_correction.h
@@ -0,0 +1,41 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SERVICES_PREDICTION_TOUCH_POSITION_CORRECTION_H_
+#define SERVICES_PREDICTION_TOUCH_POSITION_CORRECTION_H_
+
+#include "mojo/services/prediction/public/interfaces/prediction.mojom.h"
+
+// NOTE: This class has been translated to C++ and modified from the Android
+// Open Source Project. Specifically from some functions of the following file:
+// https://android.googlesource.com/platform/packages/inputmethods/LatinIME/+/
+// android-5.1.1_r8/java/src/com/android/inputmethod/keyboard/internal/
+// TouchPositionCorrection.java
+
+namespace prediction {
+
+class TouchPositionCorrection {
+ public:
+ TouchPositionCorrection();
+ ~TouchPositionCorrection();
+
+ bool IsValid();
+
+ int GetRows();
+ float GetX(const int row);
+ float GetY(const int row);
+ float GetRadius(const int row);
+
+ private:
+ static const int TOUCH_POSITION_CORRECTION_RECORD_SIZE;
+
+ bool enabled_;
+ float xs_[3];
+ float ys_[3];
+ float radii_[3];
+}; // class TouchPositionCorrection
+
+} // namespace prediction
+
+#endif // SERVICES_PREDICTION_TOUCH_POSITION_CORRECTION_H_
diff --git a/third_party/android_prediction/BUILD.gn b/third_party/android_prediction/BUILD.gn
new file mode 100644
index 0000000..f827551
--- /dev/null
+++ b/third_party/android_prediction/BUILD.gn
@@ -0,0 +1,222 @@
+# Copyright 2015 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+source_set("suggest") {
+ sources = [
+ "defines.h",
+ "suggest/core/dicnode/dic_node.cpp",
+ "suggest/core/dicnode/dic_node.h",
+ "suggest/core/dicnode/dic_node_pool.h",
+ "suggest/core/dicnode/dic_node_priority_queue.h",
+ "suggest/core/dicnode/dic_node_profiler.h",
+ "suggest/core/dicnode/dic_node_utils.cpp",
+ "suggest/core/dicnode/dic_node_utils.h",
+ "suggest/core/dicnode/dic_node_vector.h",
+ "suggest/core/dicnode/dic_nodes_cache.cpp",
+ "suggest/core/dicnode/dic_nodes_cache.h",
+ "suggest/core/dicnode/internal/dic_node_properties.h",
+ "suggest/core/dicnode/internal/dic_node_state.h",
+ "suggest/core/dicnode/internal/dic_node_state_input.h",
+ "suggest/core/dicnode/internal/dic_node_state_output.h",
+ "suggest/core/dicnode/internal/dic_node_state_scoring.h",
+ "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h",
+ "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h",
+ "suggest/core/dictionary/bloom_filter.h",
+ "suggest/core/dictionary/dictionary.cpp",
+ "suggest/core/dictionary/dictionary.h",
+ "suggest/core/dictionary/dictionary_utils.cpp",
+ "suggest/core/dictionary/dictionary_utils.h",
+ "suggest/core/dictionary/digraph_utils.cpp",
+ "suggest/core/dictionary/digraph_utils.h",
+ "suggest/core/dictionary/error_type_utils.cpp",
+ "suggest/core/dictionary/error_type_utils.h",
+ "suggest/core/dictionary/multi_bigram_map.cpp",
+ "suggest/core/dictionary/multi_bigram_map.h",
+ "suggest/core/dictionary/ngram_listener.h",
+ "suggest/core/dictionary/property/bigram_property.h",
+ "suggest/core/dictionary/property/unigram_property.h",
+ "suggest/core/dictionary/property/word_property.h",
+ "suggest/core/layout/additional_proximity_chars.cpp",
+ "suggest/core/layout/additional_proximity_chars.h",
+ "suggest/core/layout/geometry_utils.h",
+ "suggest/core/layout/normal_distribution.h",
+ "suggest/core/layout/normal_distribution_2d.h",
+ "suggest/core/layout/proximity_info.cpp",
+ "suggest/core/layout/proximity_info.h",
+ "suggest/core/layout/proximity_info_params.cpp",
+ "suggest/core/layout/proximity_info_params.h",
+ "suggest/core/layout/proximity_info_state.cpp",
+ "suggest/core/layout/proximity_info_state.h",
+ "suggest/core/layout/proximity_info_state_utils.cpp",
+ "suggest/core/layout/proximity_info_state_utils.h",
+ "suggest/core/layout/proximity_info_utils.h",
+ "suggest/core/layout/touch_position_correction_utils.h",
+ "suggest/core/policy/dictionary_bigrams_structure_policy.h",
+ "suggest/core/policy/dictionary_header_structure_policy.h",
+ "suggest/core/policy/dictionary_shortcuts_structure_policy.h",
+ "suggest/core/policy/dictionary_structure_with_buffer_policy.h",
+ "suggest/core/policy/scoring.h",
+ "suggest/core/policy/suggest_policy.h",
+ "suggest/core/policy/traversal.h",
+ "suggest/core/policy/weighting.cpp",
+ "suggest/core/policy/weighting.h",
+ "suggest/core/result/suggested_word.h",
+ "suggest/core/result/suggestion_results.cpp",
+ "suggest/core/result/suggestion_results.h",
+ "suggest/core/result/suggestions_output_utils.cpp",
+ "suggest/core/result/suggestions_output_utils.h",
+ "suggest/core/session/dic_traverse_session.cpp",
+ "suggest/core/session/dic_traverse_session.h",
+ "suggest/core/session/prev_words_info.h",
+ "suggest/core/suggest.cpp",
+ "suggest/core/suggest.h",
+ "suggest/core/suggest_interface.h",
+ "suggest/core/suggest_options.h",
+ "suggest/policyimpl/dictionary/header/header_policy.cpp",
+ "suggest/policyimpl/dictionary/header/header_policy.h",
+ "suggest/policyimpl/dictionary/header/header_read_write_utils.cpp",
+ "suggest/policyimpl/dictionary/header/header_read_write_utils.h",
+ "suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.cpp",
+ "suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h",
+ "suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.cpp",
+ "suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.h",
+ "suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_entry.h",
+ "suggest/policyimpl/dictionary/structure/backward/v402/content/dict_content.h",
+ "suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.cpp",
+ "suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.h",
+ "suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h",
+ "suggest/policyimpl/dictionary/structure/backward/v402/content/shortcut_dict_content.cpp",
+ "suggest/policyimpl/dictionary/structure/backward/v402/content/shortcut_dict_content.h",
+ "suggest/policyimpl/dictionary/structure/backward/v402/content/single_dict_content.h",
+ "suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.cpp",
+ "suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.h",
+ "suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.cpp",
+ "suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h",
+ "suggest/policyimpl/dictionary/structure/backward/v402/shortcut/ver4_shortcut_list_policy.h",
+ "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.cpp",
+ "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.h",
+ "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.cpp",
+ "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h",
+ "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.cpp",
+ "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h",
+ "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp",
+ "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h",
+ "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp",
+ "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h",
+ "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_reading_utils.cpp",
+ "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_reading_utils.h",
+ "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.cpp",
+ "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.h",
+ "suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.cpp",
+ "suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.h",
+ "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp",
+ "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h",
+ "suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.cpp",
+ "suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h",
+ "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp",
+ "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h",
+ "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp",
+ "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h",
+ "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.cpp",
+ "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h",
+ "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp",
+ "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h",
+ "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.cpp",
+ "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h",
+ "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.cpp",
+ "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h",
+ "suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h",
+ "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h",
+ "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h",
+ "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h",
+ "suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.cpp",
+ "suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h",
+ "suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h",
+ "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp",
+ "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h",
+ "suggest/policyimpl/dictionary/structure/v2/shortcut/shortcut_list_policy.h",
+ "suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp",
+ "suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h",
+ "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.cpp",
+ "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h",
+ "suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp",
+ "suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h",
+ "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp",
+ "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h",
+ "suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h",
+ "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp",
+ "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h",
+ "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h",
+ "suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.cpp",
+ "suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h",
+ "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h",
+ "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.cpp",
+ "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h",
+ "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.cpp",
+ "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h",
+ "suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h",
+ "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp",
+ "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h",
+ "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp",
+ "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h",
+ "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp",
+ "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h",
+ "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp",
+ "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h",
+ "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp",
+ "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h",
+ "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp",
+ "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h",
+ "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp",
+ "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h",
+ "suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.cpp",
+ "suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h",
+ "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp",
+ "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h",
+ "suggest/policyimpl/dictionary/utils/byte_array_utils.cpp",
+ "suggest/policyimpl/dictionary/utils/byte_array_utils.h",
+ "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp",
+ "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h",
+ "suggest/policyimpl/dictionary/utils/file_utils.cpp",
+ "suggest/policyimpl/dictionary/utils/file_utils.h",
+ "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp",
+ "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h",
+ "suggest/policyimpl/dictionary/utils/format_utils.cpp",
+ "suggest/policyimpl/dictionary/utils/format_utils.h",
+ "suggest/policyimpl/dictionary/utils/historical_info.h",
+ "suggest/policyimpl/dictionary/utils/mmapped_buffer.cpp",
+ "suggest/policyimpl/dictionary/utils/mmapped_buffer.h",
+ "suggest/policyimpl/dictionary/utils/probability_utils.h",
+ "suggest/policyimpl/dictionary/utils/sparse_table.cpp",
+ "suggest/policyimpl/dictionary/utils/sparse_table.h",
+ "suggest/policyimpl/dictionary/utils/trie_map.cpp",
+ "suggest/policyimpl/dictionary/utils/trie_map.h",
+ "suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp",
+ "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h",
+ "suggest/policyimpl/typing/scoring_params.cpp",
+ "suggest/policyimpl/typing/scoring_params.h",
+ "suggest/policyimpl/typing/typing_scoring.cpp",
+ "suggest/policyimpl/typing/typing_scoring.h",
+ "suggest/policyimpl/typing/typing_suggest_policy.cpp",
+ "suggest/policyimpl/typing/typing_suggest_policy.h",
+ "suggest/policyimpl/typing/typing_suggest_policy_factory.h",
+ "suggest/policyimpl/typing/typing_traversal.cpp",
+ "suggest/policyimpl/typing/typing_traversal.h",
+ "suggest/policyimpl/typing/typing_weighting.cpp",
+ "suggest/policyimpl/typing/typing_weighting.h",
+ "suggest/policyimpl/utils/damerau_levenshtein_edit_distance_policy.h",
+ "suggest/policyimpl/utils/edit_distance.h",
+ "suggest/policyimpl/utils/edit_distance_policy.h",
+ "utils/autocorrection_threshold_utils.cpp",
+ "utils/autocorrection_threshold_utils.h",
+ "utils/byte_array_view.h",
+ "utils/char_utils.cpp",
+ "utils/char_utils.h",
+ "utils/int_array_view.h",
+ "utils/time_keeper.cpp",
+ "utils/time_keeper.h",
+ ]
+ configs -= [ "//build/config/compiler:chromium_code" ]
+ configs += [ "//build/config/compiler:no_chromium_code" ]
+}
diff --git a/third_party/android_prediction/CHROMIUM.diff b/third_party/android_prediction/CHROMIUM.diff
new file mode 100644
index 0000000..4a56701
--- /dev/null
+++ b/third_party/android_prediction/CHROMIUM.diff
@@ -0,0 +1,378 @@
+Only in third_party/android_prediction/: BUILD.gn
+Only in third_party/android_prediction/: CHROMIUM.diff
+diff -ru /usr/local/google/home/riajiang/Downloads/android_prediction/defines.h third_party/android_prediction/defines.h
+--- /usr/local/google/home/riajiang/Downloads/android_prediction/defines.h 2015-08-04 11:08:28.000000000 -0700
++++ third_party/android_prediction/defines.h 2015-08-05 17:18:47.846770065 -0700
+@@ -17,6 +17,8 @@
+ #ifndef LATINIME_DEFINES_H
+ #define LATINIME_DEFINES_H
+
++#include "base/macros.h"
++
+ #ifdef __GNUC__
+ #define AK_FORCE_INLINE __attribute__((always_inline)) __inline__
+ #else // __GNUC__
+@@ -39,8 +41,8 @@
+ // TODO: Use size_t instead of int.
+ // Disclaimer: You will see a compile error if you use this macro against a variable-length array.
+ // Sorry for the inconvenience. It isn't supported.
+-template <typename T, int N>
+-char (&ArraySizeHelper(T (&array)[N]))[N];
++// template <typename T, int N>
++// char (&ArraySizeHelper(T (&array)[N]))[N];
+ #define NELEMS(x) (sizeof(ArraySizeHelper(x)))
+
+ AK_FORCE_INLINE static int intArrayToCharArray(const int *const source, const int sourceSize,
+@@ -348,6 +350,7 @@
+ #define DISALLOW_ASSIGNMENT_OPERATOR(TypeName) \
+ void operator=(const TypeName&) = delete
+
++/*
+ #define DISALLOW_COPY_AND_ASSIGN(TypeName) \
+ DISALLOW_COPY_CONSTRUCTOR(TypeName); \
+ DISALLOW_ASSIGNMENT_OPERATOR(TypeName)
+@@ -355,6 +358,7 @@
+ #define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
+ DISALLOW_DEFAULT_CONSTRUCTOR(TypeName); \
+ DISALLOW_COPY_AND_ASSIGN(TypeName)
++*/
+
+ // Used as a return value for character comparison
+ typedef enum {
+Only in third_party/android_prediction/: LICENSE
+Only in third_party/android_prediction/: README.chromium
+diff -ru /usr/local/google/home/riajiang/Downloads/android_prediction/suggest/core/dictionary/dictionary.cpp third_party/android_prediction/suggest/core/dictionary/dictionary.cpp
+--- /usr/local/google/home/riajiang/Downloads/android_prediction/suggest/core/dictionary/dictionary.cpp 2015-08-04 11:08:28.000000000 -0700
++++ third_party/android_prediction/suggest/core/dictionary/dictionary.cpp 2015-08-05 17:18:47.574768333 -0700
+@@ -16,31 +16,29 @@
+-#include "utils/log_utils.h"
+
+ const int Dictionary::HEADER_ATTRIBUTE_BUFFER_SIZE = 32;
+
+-Dictionary::Dictionary(JNIEnv *env, DictionaryStructureWithBufferPolicy::StructurePolicyPtr
++Dictionary::Dictionary(DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ dictionaryStructureWithBufferPolicy)
+ : mDictionaryStructureWithBufferPolicy(std::move(dictionaryStructureWithBufferPolicy)),
+ mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())),
+ mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) {
+- logDictionaryInfo(env);
+ }
+
+ void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
+@@ -191,32 +189,4 @@
+ token, outCodePoints, outCodePointCount);
+ }
+
+-void Dictionary::logDictionaryInfo(JNIEnv *const env) const {
+- int dictionaryIdCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
+- int versionStringCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
+- int dateStringCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
+- const DictionaryHeaderStructurePolicy *const headerPolicy =
+- getDictionaryStructurePolicy()->getHeaderStructurePolicy();
+- headerPolicy->readHeaderValueOrQuestionMark("dictionary", dictionaryIdCodePointBuffer,
+- HEADER_ATTRIBUTE_BUFFER_SIZE);
+- headerPolicy->readHeaderValueOrQuestionMark("version", versionStringCodePointBuffer,
+- HEADER_ATTRIBUTE_BUFFER_SIZE);
+- headerPolicy->readHeaderValueOrQuestionMark("date", dateStringCodePointBuffer,
+- HEADER_ATTRIBUTE_BUFFER_SIZE);
+-
+- char dictionaryIdCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
+- char versionStringCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
+- char dateStringCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
+- intArrayToCharArray(dictionaryIdCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE,
+- dictionaryIdCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE);
+- intArrayToCharArray(versionStringCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE,
+- versionStringCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE);
+- intArrayToCharArray(dateStringCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE,
+- dateStringCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE);
+-
+- LogUtils::logToJava(env,
+- "Dictionary info: dictionary = %s ; version = %s ; date = %s",
+- dictionaryIdCharBuffer, versionStringCharBuffer, dateStringCharBuffer);
+-}
+-
+ } // namespace latinime
+diff -ru /usr/local/google/home/riajiang/Downloads/android_prediction/suggest/core/dictionary/dictionary.h third_party/android_prediction/suggest/core/dictionary/dictionary.h
+--- /usr/local/google/home/riajiang/Downloads/android_prediction/suggest/core/dictionary/dictionary.h 2015-08-04 11:08:28.000000000 -0700
++++ third_party/android_prediction/suggest/core/dictionary/dictionary.h 2015-08-05 17:18:47.574768333 -0700
+@@ -19,13 +19,12 @@
+-#include "jni.h"
+
+@@ -59,7 +58,7 @@
+ static const int KIND_FLAG_EXACT_MATCH = 0x40000000;
+ static const int KIND_FLAG_EXACT_MATCH_WITH_INTENTIONAL_OMISSION = 0x20000000;
+
+- Dictionary(JNIEnv *env, DictionaryStructureWithBufferPolicy::StructurePolicyPtr
++ Dictionary(DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ dictionaryStructureWithBufferPolicy);
+
+ void getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
+@@ -136,8 +135,6 @@
+ mDictionaryStructureWithBufferPolicy;
+ const SuggestInterfacePtr mGestureSuggest;
+ const SuggestInterfacePtr mTypingSuggest;
+-
+- void logDictionaryInfo(JNIEnv *const env) const;
+ };
+ } // namespace latinime
+ #endif // LATINIME_DICTIONARY_H
+Only in /usr/local/google/home/riajiang/Downloads/android_prediction/suggest/core/dictionary/property: word_property.cpp
+diff -ru /usr/local/google/home/riajiang/Downloads/android_prediction/suggest/core/dictionary/property/word_property.h third_party/android_prediction/suggest/core/dictionary/property/word_property.h
+--- /usr/local/google/home/riajiang/Downloads/android_prediction/suggest/core/dictionary/property/word_property.h 2015-08-04 11:08:28.000000000 -0700
++++ third_party/android_prediction/suggest/core/dictionary/property/word_property.h 2015-08-05 17:18:47.578768359 -0700
+@@ -19,10 +19,9 @@
+-#include "jni.h"
+
+@@ -38,10 +37,6 @@
+ const std::vector<BigramProperty> *const bigrams)
+ : mCodePoints(*codePoints), mUnigramProperty(*unigramProperty), mBigrams(*bigrams) {}
+
+- void outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags,
+- jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilities,
+- jobject outShortcutTargets, jobject outShortcutProbabilities) const;
+-
+ const UnigramProperty *getUnigramProperty() const {
+ return &mUnigramProperty;
+ }
+diff -ru /usr/local/google/home/riajiang/Downloads/android_prediction/suggest/core/layout/proximity_info.cpp third_party/android_prediction/suggest/core/layout/proximity_info.cpp
+--- /usr/local/google/home/riajiang/Downloads/android_prediction/suggest/core/layout/proximity_info.cpp 2015-08-04 11:08:28.000000000 -0700
++++ third_party/android_prediction/suggest/core/layout/proximity_info.cpp 2015-08-05 17:18:47.578768359 -0700
+@@ -16,46 +16,49 @@
+-#include "jni.h"
+
+ namespace latinime {
+
+-static AK_FORCE_INLINE void safeGetOrFillZeroIntArrayRegion(JNIEnv *env, jintArray jArray,
+- jsize len, jint *buffer) {
++static AK_FORCE_INLINE void safeGetOrFillZeroIntArrayRegion(const int *jArray,
++ int len, int *buffer) {
+ if (jArray && buffer) {
+- env->GetIntArrayRegion(jArray, 0, len, buffer);
++ for (int i = 0; i < len; i++) {
++ buffer[i] = jArray[i];
++ }
+ } else if (buffer) {
+ memset(buffer, 0, len * sizeof(buffer[0]));
+ }
+ }
+
+-static AK_FORCE_INLINE void safeGetOrFillZeroFloatArrayRegion(JNIEnv *env, jfloatArray jArray,
+- jsize len, jfloat *buffer) {
++static AK_FORCE_INLINE void safeGetOrFillZeroFloatArrayRegion(const float *jArray,
++ int len, float *buffer) {
+ if (jArray && buffer) {
+- env->GetFloatArrayRegion(jArray, 0, len, buffer);
++ for (int i = 0; i < len; i++) {
++ buffer[i] = jArray[i];
++ }
+ } else if (buffer) {
+ memset(buffer, 0, len * sizeof(buffer[0]));
+ }
+ }
+
+-ProximityInfo::ProximityInfo(JNIEnv *env, const jstring localeJStr,
++ProximityInfo::ProximityInfo(const std::string localeJStr,
+ const int keyboardWidth, const int keyboardHeight, const int gridWidth,
+ const int gridHeight, const int mostCommonKeyWidth, const int mostCommonKeyHeight,
+- const jintArray proximityChars, const int keyCount, const jintArray keyXCoordinates,
+- const jintArray keyYCoordinates, const jintArray keyWidths, const jintArray keyHeights,
+- const jintArray keyCharCodes, const jfloatArray sweetSpotCenterXs,
+- const jfloatArray sweetSpotCenterYs, const jfloatArray sweetSpotRadii)
++ int *proximityChars, int proximitySize, const int keyCount, const int *keyXCoordinates,
++ const int *keyYCoordinates, const int *keyWidths, const int *keyHeights,
++ const int *keyCharCodes, const float *sweetSpotCenterXs,
++ const float *sweetSpotCenterYs, const float *sweetSpotRadii)
+ : GRID_WIDTH(gridWidth), GRID_HEIGHT(gridHeight), MOST_COMMON_KEY_WIDTH(mostCommonKeyWidth),
+ MOST_COMMON_KEY_WIDTH_SQUARE(mostCommonKeyWidth * mostCommonKeyWidth),
+ NORMALIZED_SQUARED_MOST_COMMON_KEY_HYPOTENUSE(1.0f +
+@@ -73,7 +76,7 @@
+ /* proximityCharsLength */]),
+ mLowerCodePointToKeyMap() {
+ /* Let's check the input array length here to make sure */
+- const jsize proximityCharsLength = env->GetArrayLength(proximityChars);
++ int proximityCharsLength = proximitySize;
+ if (proximityCharsLength != GRID_WIDTH * GRID_HEIGHT * MAX_PROXIMITY_CHARS_SIZE) {
+ AKLOGE("Invalid proximityCharsLength: %d", proximityCharsLength);
+ ASSERT(false);
+@@ -82,23 +85,25 @@
+ if (DEBUG_PROXIMITY_INFO) {
+ AKLOGI("Create proximity info array %d", proximityCharsLength);
+ }
+- const jsize localeCStrUtf8Length = env->GetStringUTFLength(localeJStr);
++ const int localeCStrUtf8Length = localeJStr.length();
+ if (localeCStrUtf8Length >= MAX_LOCALE_STRING_LENGTH) {
+ AKLOGI("Locale string length too long: length=%d", localeCStrUtf8Length);
+ ASSERT(false);
+ }
+ memset(mLocaleStr, 0, sizeof(mLocaleStr));
+- env->GetStringUTFRegion(localeJStr, 0, env->GetStringLength(localeJStr), mLocaleStr);
+- safeGetOrFillZeroIntArrayRegion(env, proximityChars, proximityCharsLength,
++ for (int i = 0; i < localeCStrUtf8Length; i++) {
++ mLocaleStr[i] = localeJStr[i];
++ }
++ safeGetOrFillZeroIntArrayRegion(proximityChars, proximityCharsLength,
+ mProximityCharsArray);
+- safeGetOrFillZeroIntArrayRegion(env, keyXCoordinates, KEY_COUNT, mKeyXCoordinates);
+- safeGetOrFillZeroIntArrayRegion(env, keyYCoordinates, KEY_COUNT, mKeyYCoordinates);
+- safeGetOrFillZeroIntArrayRegion(env, keyWidths, KEY_COUNT, mKeyWidths);
+- safeGetOrFillZeroIntArrayRegion(env, keyHeights, KEY_COUNT, mKeyHeights);
+- safeGetOrFillZeroIntArrayRegion(env, keyCharCodes, KEY_COUNT, mKeyCodePoints);
+- safeGetOrFillZeroFloatArrayRegion(env, sweetSpotCenterXs, KEY_COUNT, mSweetSpotCenterXs);
+- safeGetOrFillZeroFloatArrayRegion(env, sweetSpotCenterYs, KEY_COUNT, mSweetSpotCenterYs);
+- safeGetOrFillZeroFloatArrayRegion(env, sweetSpotRadii, KEY_COUNT, mSweetSpotRadii);
++ safeGetOrFillZeroIntArrayRegion(keyXCoordinates, KEY_COUNT, mKeyXCoordinates);
++ safeGetOrFillZeroIntArrayRegion(keyYCoordinates, KEY_COUNT, mKeyYCoordinates);
++ safeGetOrFillZeroIntArrayRegion(keyWidths, KEY_COUNT, mKeyWidths);
++ safeGetOrFillZeroIntArrayRegion(keyHeights, KEY_COUNT, mKeyHeights);
++ safeGetOrFillZeroIntArrayRegion(keyCharCodes, KEY_COUNT, mKeyCodePoints);
++ safeGetOrFillZeroFloatArrayRegion(sweetSpotCenterXs, KEY_COUNT, mSweetSpotCenterXs);
++ safeGetOrFillZeroFloatArrayRegion(sweetSpotCenterYs, KEY_COUNT, mSweetSpotCenterYs);
++ safeGetOrFillZeroFloatArrayRegion(sweetSpotRadii, KEY_COUNT, mSweetSpotRadii);
+ initializeG();
+ }
+
+diff -ru /usr/local/google/home/riajiang/Downloads/android_prediction/suggest/core/layout/proximity_info.h third_party/android_prediction/suggest/core/layout/proximity_info.h
+--- /usr/local/google/home/riajiang/Downloads/android_prediction/suggest/core/layout/proximity_info.h 2015-08-04 11:08:28.000000000 -0700
++++ third_party/android_prediction/suggest/core/layout/proximity_info.h 2015-08-05 17:18:47.578768359 -0700
+@@ -17,23 +17,23 @@
+ #ifndef LATINIME_PROXIMITY_INFO_H
+ #define LATINIME_PROXIMITY_INFO_H
+
++#include <string>
+ #include <unordered_map>
+
+-#include "jni.h"
+
+ namespace latinime {
+
+ class ProximityInfo {
+ public:
+- ProximityInfo(JNIEnv *env, const jstring localeJStr,
++ ProximityInfo(const std::string localeJStr,
+ const int keyboardWidth, const int keyboardHeight, const int gridWidth,
+ const int gridHeight, const int mostCommonKeyWidth, const int mostCommonKeyHeight,
+- const jintArray proximityChars, const int keyCount, const jintArray keyXCoordinates,
+- const jintArray keyYCoordinates, const jintArray keyWidths, const jintArray keyHeights,
+- const jintArray keyCharCodes, const jfloatArray sweetSpotCenterXs,
+- const jfloatArray sweetSpotCenterYs, const jfloatArray sweetSpotRadii);
++ int *proximityChars, int proximitySize, const int keyCount, const int *keyXCoordinates,
++ const int *keyYCoordinates, const int *keyWidths, const int *keyHeights,
++ const int *keyCharCodes, const float *sweetSpotCenterXs,
++ const float *sweetSpotCenterYs, const float *sweetSpotRadii);
+ ~ProximityInfo();
+ bool hasSpaceProximity(const int x, const int y) const;
+ float getNormalizedSquaredDistanceFromCenterFloatG(
+diff -ru /usr/local/google/home/riajiang/Downloads/android_prediction/suggest/core/result/suggestion_results.cpp third_party/android_prediction/suggest/core/result/suggestion_results.cpp
+--- /usr/local/google/home/riajiang/Downloads/android_prediction/suggest/core/result/suggestion_results.cpp 2015-08-04 11:08:28.000000000 -0700
++++ third_party/android_prediction/suggest/core/result/suggestion_results.cpp 2015-08-05 17:18:47.582768384 -0700
+@@ -14,39 +14,10 @@
+ * limitations under the License.
+ */
+-#include "utils/jni_data_utils.h"
+
+ namespace latinime {
+
+-void SuggestionResults::outputSuggestions(JNIEnv *env, jintArray outSuggestionCount,
+- jintArray outputCodePointsArray, jintArray outScoresArray, jintArray outSpaceIndicesArray,
+- jintArray outTypesArray, jintArray outAutoCommitFirstWordConfidenceArray,
+- jfloatArray outLanguageWeight) {
+- int outputIndex = 0;
+- while (!mSuggestedWords.empty()) {
+- const SuggestedWord &suggestedWord = mSuggestedWords.top();
+- suggestedWord.getCodePointCount();
+- const int start = outputIndex * MAX_WORD_LENGTH;
+- JniDataUtils::outputCodePoints(env, outputCodePointsArray, start,
+- MAX_WORD_LENGTH /* maxLength */, suggestedWord.getCodePoint(),
+- suggestedWord.getCodePointCount(), true /* needsNullTermination */);
+- JniDataUtils::putIntToArray(env, outScoresArray, outputIndex, suggestedWord.getScore());
+- JniDataUtils::putIntToArray(env, outSpaceIndicesArray, outputIndex,
+- suggestedWord.getIndexToPartialCommit());
+- JniDataUtils::putIntToArray(env, outTypesArray, outputIndex, suggestedWord.getType());
+- if (mSuggestedWords.size() == 1) {
+- JniDataUtils::putIntToArray(env, outAutoCommitFirstWordConfidenceArray, 0 /* index */,
+- suggestedWord.getAutoCommitFirstWordConfidence());
+- }
+- ++outputIndex;
+- mSuggestedWords.pop();
+- }
+- JniDataUtils::putIntToArray(env, outSuggestionCount, 0 /* index */, outputIndex);
+- JniDataUtils::putFloatToArray(env, outLanguageWeight, 0 /* index */, mLanguageWeight);
+-}
+-
+ void SuggestionResults::addPrediction(const int *const codePoints, const int codePointCount,
+ const int probability) {
+ if (probability == NOT_A_PROBABILITY) {
+diff -ru /usr/local/google/home/riajiang/Downloads/android_prediction/suggest/core/result/suggestion_results.h third_party/android_prediction/suggest/core/result/suggestion_results.h
+--- /usr/local/google/home/riajiang/Downloads/android_prediction/suggest/core/result/suggestion_results.h 2015-08-04 11:08:28.000000000 -0700
++++ third_party/android_prediction/suggest/core/result/suggestion_results.h 2015-08-05 17:18:47.582768384 -0700
+@@ -20,22 +20,17 @@
+-#include "jni.h"
+
+ namespace latinime {
+
+ class SuggestionResults {
+ public:
+ explicit SuggestionResults(const int maxSuggestionCount)
+- : mMaxSuggestionCount(maxSuggestionCount), mLanguageWeight(NOT_A_LANGUAGE_WEIGHT),
+- mSuggestedWords() {}
++ : mSuggestedWords(), mMaxSuggestionCount(maxSuggestionCount),
++ mLanguageWeight(NOT_A_LANGUAGE_WEIGHT) {}
+
+- // Returns suggestion count.
+- void outputSuggestions(JNIEnv *env, jintArray outSuggestionCount, jintArray outCodePointsArray,
+- jintArray outScoresArray, jintArray outSpaceIndicesArray, jintArray outTypesArray,
+- jintArray outAutoCommitFirstWordConfidenceArray, jfloatArray outLanguageWeight);
+ void addPrediction(const int *const codePoints, const int codePointCount, const int score);
+ void addSuggestion(const int *const codePoints, const int codePointCount,
+ const int score, const int type, const int indexToPartialCommit,
+@@ -51,13 +46,14 @@
+ return mSuggestedWords.size();
+ }
+
++ std::priority_queue<
++ SuggestedWord, std::vector<SuggestedWord>, SuggestedWord::Comparator> mSuggestedWords;
++
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(SuggestionResults);
+
+ const int mMaxSuggestionCount;
+ float mLanguageWeight;
+- std::priority_queue<
+- SuggestedWord, std::vector<SuggestedWord>, SuggestedWord::Comparator> mSuggestedWords;
+ };
+ } // namespace latinime
+ #endif // LATINIME_SUGGESTION_RESULTS_H
+diff -ru /usr/local/google/home/riajiang/Downloads/android_prediction/suggest/core/session/dic_traverse_session.h third_party/android_prediction/suggest/core/session/dic_traverse_session.h
+--- /usr/local/google/home/riajiang/Downloads/android_prediction/suggest/core/session/dic_traverse_session.h 2015-08-04 11:08:28.000000000 -0700
++++ third_party/android_prediction/suggest/core/session/dic_traverse_session.h 2015-08-05 17:18:47.582768384 -0700
+@@ -19,11 +19,10 @@
+-#include "jni.h"
+
+@@ -37,11 +36,11 @@
+ public:
+
+ // A factory method for DicTraverseSession
+- static AK_FORCE_INLINE void *getSessionInstance(JNIEnv *env, jstring localeStr,
+- jlong dictSize) {
++ static AK_FORCE_INLINE void *getSessionInstance(std::string localeStr,
++ long dictSize) {
+ // To deal with the trade-off between accuracy and memory space, large cache is used for
+ // dictionaries larger that the threshold
+- return new DicTraverseSession(env, localeStr,
++ return new DicTraverseSession(localeStr,
+ dictSize >= DICTIONARY_SIZE_THRESHOLD_TO_USE_LARGE_CACHE_FOR_SUGGESTION);
+ }
+
+@@ -49,7 +48,7 @@
+ delete traverseSession;
+ }
+
+- AK_FORCE_INLINE DicTraverseSession(JNIEnv *env, jstring localeStr, bool usesLargeCache)
++ AK_FORCE_INLINE DicTraverseSession(std::string localeStr, bool usesLargeCache)
+ : mProximityInfo(nullptr), mDictionary(nullptr), mSuggestOptions(nullptr),
+ mDicNodesCache(usesLargeCache), mMultiBigramMap(), mInputSize(0), mMaxPointerCount(1),
+ mMultiWordCostMultiplier(1.0f) {
+Only in /usr/local/google/home/riajiang/Downloads/android_prediction/utils: jni_data_utils.cpp
+Only in /usr/local/google/home/riajiang/Downloads/android_prediction/utils: jni_data_utils.h
+Only in /usr/local/google/home/riajiang/Downloads/android_prediction/utils: log_utils.cpp
+Only in /usr/local/google/home/riajiang/Downloads/android_prediction/utils: log_utils.h
\ No newline at end of file
diff --git a/third_party/android_prediction/LICENSE b/third_party/android_prediction/LICENSE
new file mode 100644
index 0000000..7a4a3ea
--- /dev/null
+++ b/third_party/android_prediction/LICENSE
@@ -0,0 +1,202 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
\ No newline at end of file
diff --git a/third_party/android_prediction/README.chromium b/third_party/android_prediction/README.chromium
new file mode 100644
index 0000000..c67fa8a
--- /dev/null
+++ b/third_party/android_prediction/README.chromium
@@ -0,0 +1,29 @@
+Name: android_prediction
+URL: https://android.googlesource.com/platform/packages/inputmethods/LatinIME/
++/android-5.1.1_r8/native/jni/src/
+Version: 5.1.1
+Date: 2015/07/28
+Revision: cc47075cd72d3649433c7a69ae5c88669a56646d
+License: Apache 2.0
+License File: NOT_SHIPPED
+Security Critical: no
+
+Description:
+Android LatinIME jni native code used to do spellcheck, word suggestion and
+word prediction.
+
+Local Modifications:
+Changed all JNI interface functions to not use JNI interface pointer and
+JNI types. (CHROMIUM.diff)
+
+Changed all includes to be
+defines.h -> third_party/android_prediction/defines.h,
+suggest/* -> third_party/android_prediction/suggest/*,
+utils/* -> third_party/android_prediction/utils/*
+
+Deleted
+src/suggest/core/dictionary/property/word_property.cpp,
+src/utils/jni_data_utils.cpp,
+src/utils/jni_data_utils.h,
+src/utils/log_utils.cpp,
+src/utils/log_utils.h
\ No newline at end of file
diff --git a/third_party/android_prediction/defines.h b/third_party/android_prediction/defines.h
new file mode 100644
index 0000000..3c0605a
--- /dev/null
+++ b/third_party/android_prediction/defines.h
@@ -0,0 +1,406 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DEFINES_H
+#define LATINIME_DEFINES_H
+
+#include "base/macros.h"
+
+#ifdef __GNUC__
+#define AK_FORCE_INLINE __attribute__((always_inline)) __inline__
+#else // __GNUC__
+#define AK_FORCE_INLINE inline
+#endif // __GNUC__
+
+#if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG)
+#undef AK_FORCE_INLINE
+#define AK_FORCE_INLINE inline
+#endif // defined(FLAG_DO_PROFILE) || defined(FLAG_DBG)
+
+// Must be equal to Constants.Dictionary.MAX_WORD_LENGTH in Java
+#define MAX_WORD_LENGTH 48
+// Must be equal to BinaryDictionary.MAX_RESULTS in Java
+#define MAX_RESULTS 18
+// Must be equal to ProximityInfo.MAX_PROXIMITY_CHARS_SIZE in Java
+#define MAX_PROXIMITY_CHARS_SIZE 16
+#define ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE 2
+
+// TODO: Use size_t instead of int.
+// Disclaimer: You will see a compile error if you use this macro against a variable-length array.
+// Sorry for the inconvenience. It isn't supported.
+// template <typename T, int N>
+// char (&ArraySizeHelper(T (&array)[N]))[N];
+#define NELEMS(x) (sizeof(ArraySizeHelper(x)))
+
+AK_FORCE_INLINE static int intArrayToCharArray(const int *const source, const int sourceSize,
+ char *dest, const int destSize) {
+ // We want to always terminate with a 0 char, so stop one short of the length to make
+ // sure there is room.
+ const int destLimit = destSize - 1;
+ int si = 0;
+ int di = 0;
+ while (si < sourceSize && di < destLimit && 0 != source[si]) {
+ const int codePoint = source[si++];
+ if (codePoint < 0x7F) { // One byte
+ dest[di++] = codePoint;
+ } else if (codePoint < 0x7FF) { // Two bytes
+ if (di + 1 >= destLimit) break;
+ dest[di++] = 0xC0 + (codePoint >> 6);
+ dest[di++] = 0x80 + (codePoint & 0x3F);
+ } else if (codePoint < 0xFFFF) { // Three bytes
+ if (di + 2 >= destLimit) break;
+ dest[di++] = 0xE0 + (codePoint >> 12);
+ dest[di++] = 0x80 + ((codePoint >> 6) & 0x3F);
+ dest[di++] = 0x80 + (codePoint & 0x3F);
+ } else if (codePoint <= 0x1FFFFF) { // Four bytes
+ if (di + 3 >= destLimit) break;
+ dest[di++] = 0xF0 + (codePoint >> 18);
+ dest[di++] = 0x80 + ((codePoint >> 12) & 0x3F);
+ dest[di++] = 0x80 + ((codePoint >> 6) & 0x3F);
+ dest[di++] = 0x80 + (codePoint & 0x3F);
+ } else if (codePoint <= 0x3FFFFFF) { // Five bytes
+ if (di + 4 >= destLimit) break;
+ dest[di++] = 0xF8 + (codePoint >> 24);
+ dest[di++] = 0x80 + ((codePoint >> 18) & 0x3F);
+ dest[di++] = 0x80 + ((codePoint >> 12) & 0x3F);
+ dest[di++] = 0x80 + ((codePoint >> 6) & 0x3F);
+ dest[di++] = codePoint & 0x3F;
+ } else if (codePoint <= 0x7FFFFFFF) { // Six bytes
+ if (di + 5 >= destLimit) break;
+ dest[di++] = 0xFC + (codePoint >> 30);
+ dest[di++] = 0x80 + ((codePoint >> 24) & 0x3F);
+ dest[di++] = 0x80 + ((codePoint >> 18) & 0x3F);
+ dest[di++] = 0x80 + ((codePoint >> 12) & 0x3F);
+ dest[di++] = 0x80 + ((codePoint >> 6) & 0x3F);
+ dest[di++] = codePoint & 0x3F;
+ } else {
+ // Not a code point... skip.
+ }
+ }
+ dest[di] = 0;
+ return di;
+}
+
+#if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG)
+#if defined(__ANDROID__)
+#include <android/log.h>
+#endif // defined(__ANDROID__)
+#ifndef LOG_TAG
+#define LOG_TAG "LatinIME: "
+#endif // LOG_TAG
+
+#if defined(HOST_TOOL)
+#include <stdio.h>
+#define AKLOGE(fmt, ...) printf(fmt "\n", ##__VA_ARGS__)
+#define AKLOGI(fmt, ...) printf(fmt "\n", ##__VA_ARGS__)
+#else // defined(HOST_TOOL)
+#define AKLOGE(fmt, ...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, fmt, ##__VA_ARGS__)
+#define AKLOGI(fmt, ...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, fmt, ##__VA_ARGS__)
+#endif // defined(HOST_TOOL)
+
+#define DUMP_SUGGESTION(words, frequencies, index, score) \
+ do { dumpWordInfo(words, frequencies, index, score); } while (0)
+#define DUMP_WORD(word, length) do { dumpWord(word, length); } while (0)
+#define INTS_TO_CHARS(input, length, output, outlength) do { \
+ intArrayToCharArray(input, length, output, outlength); } while (0)
+
+static inline void dumpWordInfo(const int *word, const int length, const int rank,
+ const int probability) {
+ static char charBuf[50];
+ const int N = intArrayToCharArray(word, length, charBuf, NELEMS(charBuf));
+ if (N > 1) {
+ AKLOGI("%2d [ %s ] (%d)", rank, charBuf, probability);
+ }
+}
+
+static AK_FORCE_INLINE void dumpWord(const int *word, const int length) {
+ static char charBuf[50];
+ const int N = intArrayToCharArray(word, length, charBuf, NELEMS(charBuf));
+ if (N > 1) {
+ AKLOGI("[ %s ]", charBuf);
+ }
+}
+
+#ifndef __ANDROID__
+#include <cassert>
+#include <execinfo.h>
+#include <stdlib.h>
+
+#define DO_ASSERT_TEST
+#define ASSERT(success) do { if (!(success)) { showStackTrace(); assert(success);} } while (0)
+#define SHOW_STACK_TRACE do { showStackTrace(); } while (0)
+
+static inline void showStackTrace() {
+ void *callstack[128];
+ int i, frames = backtrace(callstack, 128);
+ char **strs = backtrace_symbols(callstack, frames);
+ for (i = 0; i < frames; ++i) {
+ if (i == 0) {
+ AKLOGI("=== Trace ===");
+ continue;
+ }
+ AKLOGI("%s", strs[i]);
+ }
+ free(strs);
+}
+#else // __ANDROID__
+#include <cassert>
+#define DO_ASSERT_TEST
+#define ASSERT(success) assert(success)
+#define SHOW_STACK_TRACE
+#endif // __ANDROID__
+
+#else // defined(FLAG_DO_PROFILE) || defined(FLAG_DBG)
+#define AKLOGE(fmt, ...)
+#define AKLOGI(fmt, ...)
+#define DUMP_SUGGESTION(words, frequencies, index, score)
+#define DUMP_WORD(word, length)
+#undef DO_ASSERT_TEST
+#define ASSERT(success)
+#define SHOW_STACK_TRACE
+#define INTS_TO_CHARS(input, length, output)
+#endif // defined(FLAG_DO_PROFILE) || defined(FLAG_DBG)
+
+#ifdef FLAG_DO_PROFILE
+// Profiler
+#include <time.h>
+
+#define PROF_BUF_SIZE 100
+static float profile_buf[PROF_BUF_SIZE];
+static float profile_old[PROF_BUF_SIZE];
+static unsigned int profile_counter[PROF_BUF_SIZE];
+
+#define PROF_RESET prof_reset()
+#define PROF_COUNT(prof_buf_id) ++profile_counter[prof_buf_id]
+#define PROF_OPEN do { PROF_RESET; PROF_START(PROF_BUF_SIZE - 1); } while (0)
+#define PROF_START(prof_buf_id) do { \
+ PROF_COUNT(prof_buf_id); profile_old[prof_buf_id] = (clock()); } while (0)
+#define PROF_CLOSE do { PROF_END(PROF_BUF_SIZE - 1); PROF_OUTALL; } while (0)
+#define PROF_END(prof_buf_id) profile_buf[prof_buf_id] += ((clock()) - profile_old[prof_buf_id])
+#define PROF_CLOCKOUT(prof_buf_id) \
+ AKLOGI("%s : clock is %f", __FUNCTION__, (clock() - profile_old[prof_buf_id]))
+#define PROF_OUTALL do { AKLOGI("--- %s ---", __FUNCTION__); prof_out(); } while (0)
+
+static inline void prof_reset(void) {
+ for (int i = 0; i < PROF_BUF_SIZE; ++i) {
+ profile_buf[i] = 0;
+ profile_old[i] = 0;
+ profile_counter[i] = 0;
+ }
+}
+
+static inline void prof_out(void) {
+ if (profile_counter[PROF_BUF_SIZE - 1] != 1) {
+ AKLOGI("Error: You must call PROF_OPEN before PROF_CLOSE.");
+ }
+ AKLOGI("Total time is %6.3f ms.",
+ profile_buf[PROF_BUF_SIZE - 1] * 1000.0f / static_cast<float>(CLOCKS_PER_SEC));
+ float all = 0.0f;
+ for (int i = 0; i < PROF_BUF_SIZE - 1; ++i) {
+ all += profile_buf[i];
+ }
+ if (all < 1.0f) all = 1.0f;
+ for (int i = 0; i < PROF_BUF_SIZE - 1; ++i) {
+ if (profile_buf[i] > 0.0f) {
+ AKLOGI("(%d): Used %4.2f%%, %8.4f ms. Called %d times.",
+ i, (profile_buf[i] * 100.0f / all),
+ profile_buf[i] * 1000.0f / static_cast<float>(CLOCKS_PER_SEC),
+ profile_counter[i]);
+ }
+ }
+}
+
+#else // FLAG_DO_PROFILE
+#define PROF_BUF_SIZE 0
+#define PROF_RESET
+#define PROF_COUNT(prof_buf_id)
+#define PROF_OPEN
+#define PROF_START(prof_buf_id)
+#define PROF_CLOSE
+#define PROF_END(prof_buf_id)
+#define PROF_CLOCK_OUT(prof_buf_id)
+#define PROF_CLOCKOUT(prof_buf_id)
+#define PROF_OUTALL
+
+#endif // FLAG_DO_PROFILE
+
+#ifdef FLAG_DBG
+#define DEBUG_DICT true
+#define DEBUG_DICT_FULL false
+#define DEBUG_EDIT_DISTANCE false
+#define DEBUG_NODE DEBUG_DICT_FULL
+#define DEBUG_TRACE DEBUG_DICT_FULL
+#define DEBUG_PROXIMITY_INFO false
+#define DEBUG_PROXIMITY_CHARS false
+#define DEBUG_CORRECTION false
+#define DEBUG_CORRECTION_FREQ false
+#define DEBUG_SAMPLING_POINTS false
+#define DEBUG_POINTS_PROBABILITY false
+#define DEBUG_DOUBLE_LETTER false
+#define DEBUG_CACHE false
+#define DEBUG_DUMP_ERROR false
+#define DEBUG_EVALUATE_MOST_PROBABLE_STRING false
+
+#ifdef FLAG_FULL_DBG
+#define DEBUG_GEO_FULL true
+#else
+#define DEBUG_GEO_FULL false
+#endif
+
+#else // FLAG_DBG
+
+#define DEBUG_DICT false
+#define DEBUG_DICT_FULL false
+#define DEBUG_EDIT_DISTANCE false
+#define DEBUG_NODE false
+#define DEBUG_TRACE false
+#define DEBUG_PROXIMITY_INFO false
+#define DEBUG_PROXIMITY_CHARS false
+#define DEBUG_CORRECTION false
+#define DEBUG_CORRECTION_FREQ false
+#define DEBUG_SAMPLING_POINTS false
+#define DEBUG_POINTS_PROBABILITY false
+#define DEBUG_DOUBLE_LETTER false
+#define DEBUG_CACHE false
+#define DEBUG_DUMP_ERROR false
+#define DEBUG_EVALUATE_MOST_PROBABLE_STRING false
+
+#define DEBUG_GEO_FULL false
+
+#endif // FLAG_DBG
+
+#ifndef S_INT_MAX
+#define S_INT_MAX 2147483647 // ((1 << 31) - 1)
+#endif
+#ifndef S_INT_MIN
+// The literal constant -2147483648 does not work in C prior C90, because
+// the compiler tries to fit the positive number into an int and then negate it.
+// GCC warns about this.
+#define S_INT_MIN (-2147483647 - 1) // -(1 << 31)
+#endif
+
+#define M_PI_F 3.14159265f
+#define MAX_PERCENTILE 100
+
+#define NOT_A_CODE_POINT (-1)
+#define NOT_A_DISTANCE (-1)
+#define NOT_A_COORDINATE (-1)
+#define NOT_AN_INDEX (-1)
+#define NOT_A_PROBABILITY (-1)
+#define NOT_A_DICT_POS (S_INT_MIN)
+#define NOT_A_TIMESTAMP (-1)
+#define NOT_A_LANGUAGE_WEIGHT (-1.0f)
+
+// A special value to mean the first word confidence makes no sense in this case,
+// e.g. this is not a multi-word suggestion.
+#define NOT_A_FIRST_WORD_CONFIDENCE (S_INT_MIN)
+// How high the confidence needs to be for us to auto-commit. Arbitrary.
+// This needs to be the same as CONFIDENCE_FOR_AUTO_COMMIT in BinaryDictionary.java
+#define CONFIDENCE_FOR_AUTO_COMMIT (1000000)
+// 80% of the full confidence
+#define DISTANCE_WEIGHT_FOR_AUTO_COMMIT (80 * CONFIDENCE_FOR_AUTO_COMMIT / 100)
+// 100% of the full confidence
+#define LENGTH_WEIGHT_FOR_AUTO_COMMIT (CONFIDENCE_FOR_AUTO_COMMIT)
+// 80% of the full confidence
+#define SPACE_COUNT_WEIGHT_FOR_AUTO_COMMIT (80 * CONFIDENCE_FOR_AUTO_COMMIT / 100)
+
+#define KEYCODE_SPACE ' '
+#define KEYCODE_SINGLE_QUOTE '\''
+#define KEYCODE_HYPHEN_MINUS '-'
+// Code point to indicate beginning-of-sentence. This is not in the code point space of unicode.
+#define CODE_POINT_BEGINNING_OF_SENTENCE 0x110000
+
+#define SUGGEST_INTERFACE_OUTPUT_SCALE 1000000.0f
+#define MAX_PROBABILITY 255
+#define MAX_BIGRAM_ENCODED_PROBABILITY 15
+
+// Max value for length, distance and probability which are used in weighting
+// TODO: Remove
+#define MAX_VALUE_FOR_WEIGHTING 10000000
+
+// The max number of the keys in one keyboard layout
+#define MAX_KEY_COUNT_IN_A_KEYBOARD 64
+
+// TODO: Remove
+#define MAX_POINTER_COUNT 1
+#define MAX_POINTER_COUNT_G 2
+
+// (MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1)-gram is supported.
+#define MAX_PREV_WORD_COUNT_FOR_N_GRAM 1
+
+#define DISALLOW_DEFAULT_CONSTRUCTOR(TypeName) \
+ TypeName() = delete
+
+#define DISALLOW_COPY_CONSTRUCTOR(TypeName) \
+ TypeName(const TypeName&) = delete
+
+#define DISALLOW_ASSIGNMENT_OPERATOR(TypeName) \
+ void operator=(const TypeName&) = delete
+
+/*
+#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
+ DISALLOW_COPY_CONSTRUCTOR(TypeName); \
+ DISALLOW_ASSIGNMENT_OPERATOR(TypeName)
+
+#define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
+ DISALLOW_DEFAULT_CONSTRUCTOR(TypeName); \
+ DISALLOW_COPY_AND_ASSIGN(TypeName)
+*/
+
+// Used as a return value for character comparison
+typedef enum {
+ // Same char, possibly with different case or accent
+ MATCH_CHAR,
+ // It is a char located nearby on the keyboard
+ PROXIMITY_CHAR,
+ // Additional proximity char which can differ by language.
+ ADDITIONAL_PROXIMITY_CHAR,
+ // It is a substitution char
+ SUBSTITUTION_CHAR,
+ // It is an unrelated char
+ UNRELATED_CHAR,
+} ProximityType;
+
+typedef enum {
+ NOT_A_DOUBLE_LETTER,
+ A_DOUBLE_LETTER,
+ A_STRONG_DOUBLE_LETTER
+} DoubleLetterLevel;
+
+typedef enum {
+ // Correction for MATCH_CHAR
+ CT_MATCH,
+ // Correction for PROXIMITY_CHAR
+ CT_PROXIMITY,
+ // Correction for ADDITIONAL_PROXIMITY_CHAR
+ CT_ADDITIONAL_PROXIMITY,
+ // Correction for SUBSTITUTION_CHAR
+ CT_SUBSTITUTION,
+ // Skip one omitted letter
+ CT_OMISSION,
+ // Delete an unnecessarily inserted letter
+ CT_INSERTION,
+ // Swap the order of next two touch points
+ CT_TRANSPOSITION,
+ CT_COMPLETION,
+ CT_TERMINAL,
+ CT_TERMINAL_INSERTION,
+ // Create new word with space omission
+ CT_NEW_WORD_SPACE_OMISSION,
+ // Create new word with space substitution
+ CT_NEW_WORD_SPACE_SUBSTITUTION,
+} CorrectionType;
+#endif // LATINIME_DEFINES_H
diff --git a/third_party/android_prediction/suggest/core/dicnode/dic_node.cpp b/third_party/android_prediction/suggest/core/dicnode/dic_node.cpp
new file mode 100644
index 0000000..6c4daa3
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/dicnode/dic_node.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/core/dicnode/dic_node.h"
+
+namespace latinime {
+
+DicNode::DicNode(const DicNode &dicNode)
+ :
+#if DEBUG_DICT
+ mProfiler(dicNode.mProfiler),
+#endif
+ mDicNodeProperties(dicNode.mDicNodeProperties), mDicNodeState(dicNode.mDicNodeState),
+ mIsCachedForNextSuggestion(dicNode.mIsCachedForNextSuggestion) {
+ /* empty */
+}
+
+DicNode &DicNode::operator=(const DicNode &dicNode) {
+#if DEBUG_DICT
+ mProfiler = dicNode.mProfiler;
+#endif
+ mDicNodeProperties = dicNode.mDicNodeProperties;
+ mDicNodeState = dicNode.mDicNodeState;
+ mIsCachedForNextSuggestion = dicNode.mIsCachedForNextSuggestion;
+ return *this;
+}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/core/dicnode/dic_node.h b/third_party/android_prediction/suggest/core/dicnode/dic_node.h
new file mode 100644
index 0000000..f42de7f
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/dicnode/dic_node.h
@@ -0,0 +1,519 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DIC_NODE_H
+#define LATINIME_DIC_NODE_H
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/dicnode/dic_node_profiler.h"
+#include "third_party/android_prediction/suggest/core/dicnode/dic_node_utils.h"
+#include "third_party/android_prediction/suggest/core/dicnode/internal/dic_node_state.h"
+#include "third_party/android_prediction/suggest/core/dicnode/internal/dic_node_properties.h"
+#include "third_party/android_prediction/suggest/core/dictionary/digraph_utils.h"
+#include "third_party/android_prediction/suggest/core/dictionary/error_type_utils.h"
+#include "third_party/android_prediction/suggest/core/layout/proximity_info_state.h"
+#include "third_party/android_prediction/utils/char_utils.h"
+
+#if DEBUG_DICT
+#define LOGI_SHOW_ADD_COST_PROP \
+ do { \
+ char charBuf[50]; \
+ INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf, NELEMS(charBuf)); \
+ AKLOGI("%20s, \"%c\", size = %03d, total = %03d, index(0) = %02d, dist = %.4f, %s,,", \
+ __FUNCTION__, getNodeCodePoint(), inputSize, getTotalInputIndex(), \
+ getInputIndex(0), getNormalizedCompoundDistance(), charBuf); \
+ } while (0)
+#define DUMP_WORD_AND_SCORE(header) \
+ do { \
+ char charBuf[50]; \
+ INTS_TO_CHARS(getOutputWordBuf(), \
+ getNodeCodePointCount() \
+ + mDicNodeState.mDicNodeStateOutput.getPrevWordsLength(), \
+ charBuf, NELEMS(charBuf)); \
+ AKLOGI("#%8s, %5f, %5f, %5f, %5f, %s, %d, %5f,", header, \
+ getSpatialDistanceForScoring(), \
+ mDicNodeState.mDicNodeStateScoring.getLanguageDistance(), \
+ getNormalizedCompoundDistance(), getRawLength(), charBuf, \
+ getInputIndex(0), getNormalizedCompoundDistanceAfterFirstWord()); \
+ } while (0)
+#else
+#define LOGI_SHOW_ADD_COST_PROP
+#define DUMP_WORD_AND_SCORE(header)
+#endif
+
+namespace latinime {
+
+// This struct is purely a bucket to return values. No instances of this struct should be kept.
+struct DicNode_InputStateG {
+ DicNode_InputStateG()
+ : mNeedsToUpdateInputStateG(false), mPointerId(0), mInputIndex(0),
+ mPrevCodePoint(0), mTerminalDiffCost(0.0f), mRawLength(0.0f),
+ mDoubleLetterLevel(NOT_A_DOUBLE_LETTER) {}
+
+ bool mNeedsToUpdateInputStateG;
+ int mPointerId;
+ int16_t mInputIndex;
+ int mPrevCodePoint;
+ float mTerminalDiffCost;
+ float mRawLength;
+ DoubleLetterLevel mDoubleLetterLevel;
+};
+
+class DicNode {
+ // Caveat: We define Weighting as a friend class of DicNode to let Weighting change
+ // the distance of DicNode.
+ // Caution!!! In general, we avoid using the "friend" access modifier.
+ // This is an exception to explicitly hide DicNode::addCost() from all classes but Weighting.
+ friend class Weighting;
+
+ public:
+#if DEBUG_DICT
+ DicNodeProfiler mProfiler;
+#endif
+
+ AK_FORCE_INLINE DicNode()
+ :
+#if DEBUG_DICT
+ mProfiler(),
+#endif
+ mDicNodeProperties(), mDicNodeState(), mIsCachedForNextSuggestion(false) {}
+
+ DicNode(const DicNode &dicNode);
+ DicNode &operator=(const DicNode &dicNode);
+ ~DicNode() {}
+
+ // Init for copy
+ void initByCopy(const DicNode *const dicNode) {
+ mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
+ mDicNodeProperties.initByCopy(&dicNode->mDicNodeProperties);
+ mDicNodeState.initByCopy(&dicNode->mDicNodeState);
+ PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
+ }
+
+ // Init for root with prevWordsPtNodePos which is used for n-gram
+ void initAsRoot(const int rootPtNodeArrayPos, const int *const prevWordsPtNodePos) {
+ mIsCachedForNextSuggestion = false;
+ mDicNodeProperties.init(rootPtNodeArrayPos, prevWordsPtNodePos);
+ mDicNodeState.init();
+ PROF_NODE_RESET(mProfiler);
+ }
+
+ // Init for root with previous word
+ void initAsRootWithPreviousWord(const DicNode *const dicNode, const int rootPtNodeArrayPos) {
+ mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
+ int newPrevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ newPrevWordsPtNodePos[0] = dicNode->mDicNodeProperties.getPtNodePos();
+ for (size_t i = 1; i < NELEMS(newPrevWordsPtNodePos); ++i) {
+ newPrevWordsPtNodePos[i] = dicNode->getPrevWordsTerminalPtNodePos()[i - 1];
+ }
+ mDicNodeProperties.init(rootPtNodeArrayPos, newPrevWordsPtNodePos);
+ mDicNodeState.initAsRootWithPreviousWord(&dicNode->mDicNodeState,
+ dicNode->mDicNodeProperties.getDepth());
+ PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
+ }
+
+ void initAsPassingChild(const DicNode *parentDicNode) {
+ mIsCachedForNextSuggestion = parentDicNode->mIsCachedForNextSuggestion;
+ const int codePoint =
+ parentDicNode->mDicNodeState.mDicNodeStateOutput.getCurrentWordCodePointAt(
+ parentDicNode->getNodeCodePointCount());
+ mDicNodeProperties.init(&parentDicNode->mDicNodeProperties, codePoint);
+ mDicNodeState.initByCopy(&parentDicNode->mDicNodeState);
+ PROF_NODE_COPY(&parentDicNode->mProfiler, mProfiler);
+ }
+
+ void initAsChild(const DicNode *const dicNode, const int ptNodePos,
+ const int childrenPtNodeArrayPos, const int probability, const bool isTerminal,
+ const bool hasChildren, const bool isBlacklistedOrNotAWord,
+ const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
+ uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1);
+ mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
+ const uint16_t newLeavingDepth = static_cast<uint16_t>(
+ dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount);
+ mDicNodeProperties.init(ptNodePos, childrenPtNodeArrayPos, mergedNodeCodePoints[0],
+ probability, isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth,
+ newLeavingDepth, dicNode->mDicNodeProperties.getPrevWordsTerminalPtNodePos());
+ mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount,
+ mergedNodeCodePoints);
+ PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
+ }
+
+ bool isRoot() const {
+ return getNodeCodePointCount() == 0;
+ }
+
+ bool hasChildren() const {
+ return mDicNodeProperties.hasChildren();
+ }
+
+ bool isLeavingNode() const {
+ ASSERT(getNodeCodePointCount() <= mDicNodeProperties.getLeavingDepth());
+ return getNodeCodePointCount() == mDicNodeProperties.getLeavingDepth();
+ }
+
+ AK_FORCE_INLINE bool isFirstLetter() const {
+ return getNodeCodePointCount() == 1;
+ }
+
+ bool isCached() const {
+ return mIsCachedForNextSuggestion;
+ }
+
+ void setCached() {
+ mIsCachedForNextSuggestion = true;
+ }
+
+ // Check if the current word and the previous word can be considered as a valid multiple word
+ // suggestion.
+ bool isValidMultipleWordSuggestion() const {
+ if (isBlacklistedOrNotAWord()) {
+ return false;
+ }
+ // Treat suggestion as invalid if the current and the previous word are single character
+ // words.
+ const int prevWordLen = mDicNodeState.mDicNodeStateOutput.getPrevWordsLength()
+ - mDicNodeState.mDicNodeStateOutput.getPrevWordStart() - 1;
+ const int currentWordLen = getNodeCodePointCount();
+ return (prevWordLen != 1 || currentWordLen != 1);
+ }
+
+ bool isFirstCharUppercase() const {
+ const int c = mDicNodeState.mDicNodeStateOutput.getCurrentWordCodePointAt(0);
+ return CharUtils::isAsciiUpper(c);
+ }
+
+ bool isCompletion(const int inputSize) const {
+ return mDicNodeState.mDicNodeStateInput.getInputIndex(0) >= inputSize;
+ }
+
+ bool canDoLookAheadCorrection(const int inputSize) const {
+ return mDicNodeState.mDicNodeStateInput.getInputIndex(0) < inputSize - 1;
+ }
+
+ // Used to get n-gram probability in DicNodeUtils.
+ int getPtNodePos() const {
+ return mDicNodeProperties.getPtNodePos();
+ }
+
+ // TODO: Use view class to return PtNodePos array.
+ const int *getPrevWordsTerminalPtNodePos() const {
+ return mDicNodeProperties.getPrevWordsTerminalPtNodePos();
+ }
+
+ // Used in DicNodeUtils
+ int getChildrenPtNodeArrayPos() const {
+ return mDicNodeProperties.getChildrenPtNodeArrayPos();
+ }
+
+ int getProbability() const {
+ return mDicNodeProperties.getProbability();
+ }
+
+ AK_FORCE_INLINE bool isTerminalDicNode() const {
+ const bool isTerminalPtNode = mDicNodeProperties.isTerminal();
+ const int currentDicNodeDepth = getNodeCodePointCount();
+ const int terminalDicNodeDepth = mDicNodeProperties.getLeavingDepth();
+ return isTerminalPtNode && currentDicNodeDepth > 0
+ && currentDicNodeDepth == terminalDicNodeDepth;
+ }
+
+ bool shouldBeFilteredBySafetyNetForBigram() const {
+ const uint16_t currentDepth = getNodeCodePointCount();
+ const int prevWordLen = mDicNodeState.mDicNodeStateOutput.getPrevWordsLength()
+ - mDicNodeState.mDicNodeStateOutput.getPrevWordStart() - 1;
+ return !(currentDepth > 0 && (currentDepth != 1 || prevWordLen != 1));
+ }
+
+ bool hasMatchedOrProximityCodePoints() const {
+ // This DicNode does not have matched or proximity code points when all code points have
+ // been handled as edit corrections or completion so far.
+ const int editCorrectionCount = mDicNodeState.mDicNodeStateScoring.getEditCorrectionCount();
+ const int completionCount = mDicNodeState.mDicNodeStateScoring.getCompletionCount();
+ return (editCorrectionCount + completionCount) < getNodeCodePointCount();
+ }
+
+ bool isTotalInputSizeExceedingLimit() const {
+ // TODO: 3 can be 2? Needs to be investigated.
+ // TODO: Have a const variable for 3 (or 2)
+ return getTotalNodeCodePointCount() > MAX_WORD_LENGTH - 3;
+ }
+
+ void outputResult(int *dest) const {
+ memmove(dest, getOutputWordBuf(), getTotalNodeCodePointCount() * sizeof(dest[0]));
+ DUMP_WORD_AND_SCORE("OUTPUT");
+ }
+
+ // "Total" in this context (and other methods in this class) means the whole suggestion. When
+ // this represents a multi-word suggestion, the referenced PtNode (in mDicNodeState) is only
+ // the one that corresponds to the last word of the suggestion, and all the previous words
+ // are concatenated together in mDicNodeStateOutput.
+ int getTotalNodeSpaceCount() const {
+ if (!hasMultipleWords()) {
+ return 0;
+ }
+ return CharUtils::getSpaceCount(mDicNodeState.mDicNodeStateOutput.getCodePointBuf(),
+ mDicNodeState.mDicNodeStateOutput.getPrevWordsLength());
+ }
+
+ int getSecondWordFirstInputIndex(const ProximityInfoState *const pInfoState) const {
+ const int inputIndex = mDicNodeState.mDicNodeStateOutput.getSecondWordFirstInputIndex();
+ if (inputIndex == NOT_AN_INDEX) {
+ return NOT_AN_INDEX;
+ } else {
+ return pInfoState->getInputIndexOfSampledPoint(inputIndex);
+ }
+ }
+
+ bool hasMultipleWords() const {
+ return mDicNodeState.mDicNodeStateOutput.getPrevWordCount() > 0;
+ }
+
+ int getProximityCorrectionCount() const {
+ return mDicNodeState.mDicNodeStateScoring.getProximityCorrectionCount();
+ }
+
+ int getEditCorrectionCount() const {
+ return mDicNodeState.mDicNodeStateScoring.getEditCorrectionCount();
+ }
+
+ // Used to prune nodes
+ float getNormalizedCompoundDistance() const {
+ return mDicNodeState.mDicNodeStateScoring.getNormalizedCompoundDistance();
+ }
+
+ // Used to prune nodes
+ float getNormalizedSpatialDistance() const {
+ return mDicNodeState.mDicNodeStateScoring.getSpatialDistance()
+ / static_cast<float>(getInputIndex(0) + 1);
+ }
+
+ // Used to prune nodes
+ float getCompoundDistance() const {
+ return mDicNodeState.mDicNodeStateScoring.getCompoundDistance();
+ }
+
+ // Used to prune nodes
+ float getCompoundDistance(const float languageWeight) const {
+ return mDicNodeState.mDicNodeStateScoring.getCompoundDistance(languageWeight);
+ }
+
+ AK_FORCE_INLINE const int *getOutputWordBuf() const {
+ return mDicNodeState.mDicNodeStateOutput.getCodePointBuf();
+ }
+
+ int getPrevCodePointG(int pointerId) const {
+ return mDicNodeState.mDicNodeStateInput.getPrevCodePoint(pointerId);
+ }
+
+ // Whether the current codepoint can be an intentional omission, in which case the traversal
+ // algorithm will always check for a possible omission here.
+ bool canBeIntentionalOmission() const {
+ return CharUtils::isIntentionalOmissionCodePoint(getNodeCodePoint());
+ }
+
+ // Whether the omission is so frequent that it should incur zero cost.
+ bool isZeroCostOmission() const {
+ // TODO: do not hardcode and read from header
+ return (getNodeCodePoint() == KEYCODE_SINGLE_QUOTE);
+ }
+
+ // TODO: remove
+ float getTerminalDiffCostG(int path) const {
+ return mDicNodeState.mDicNodeStateInput.getTerminalDiffCost(path);
+ }
+
+ //////////////////////
+ // Temporary getter //
+ // TODO: Remove //
+ //////////////////////
+ // TODO: Remove once touch path is merged into ProximityInfoState
+ // Note: Returned codepoint may be a digraph codepoint if the node is in a composite glyph.
+ int getNodeCodePoint() const {
+ const int codePoint = mDicNodeProperties.getDicNodeCodePoint();
+ const DigraphUtils::DigraphCodePointIndex digraphIndex =
+ mDicNodeState.mDicNodeStateScoring.getDigraphIndex();
+ if (digraphIndex == DigraphUtils::NOT_A_DIGRAPH_INDEX) {
+ return codePoint;
+ }
+ return DigraphUtils::getDigraphCodePointForIndex(codePoint, digraphIndex);
+ }
+
+ ////////////////////////////////
+ // Utils for cost calculation //
+ ////////////////////////////////
+ AK_FORCE_INLINE bool isSameNodeCodePoint(const DicNode *const dicNode) const {
+ return mDicNodeProperties.getDicNodeCodePoint()
+ == dicNode->mDicNodeProperties.getDicNodeCodePoint();
+ }
+
+ // TODO: remove
+ // TODO: rename getNextInputIndex
+ int16_t getInputIndex(int pointerId) const {
+ return mDicNodeState.mDicNodeStateInput.getInputIndex(pointerId);
+ }
+
+ ////////////////////////////////////
+ // Getter of features for scoring //
+ ////////////////////////////////////
+ float getSpatialDistanceForScoring() const {
+ return mDicNodeState.mDicNodeStateScoring.getSpatialDistance();
+ }
+
+ // For space-aware gestures, we store the normalized distance at the char index
+ // that ends the first word of the suggestion. We call this the distance after
+ // first word.
+ float getNormalizedCompoundDistanceAfterFirstWord() const {
+ return mDicNodeState.mDicNodeStateScoring.getNormalizedCompoundDistanceAfterFirstWord();
+ }
+
+ float getRawLength() const {
+ return mDicNodeState.mDicNodeStateScoring.getRawLength();
+ }
+
+ DoubleLetterLevel getDoubleLetterLevel() const {
+ return mDicNodeState.mDicNodeStateScoring.getDoubleLetterLevel();
+ }
+
+ void setDoubleLetterLevel(DoubleLetterLevel doubleLetterLevel) {
+ mDicNodeState.mDicNodeStateScoring.setDoubleLetterLevel(doubleLetterLevel);
+ }
+
+ bool isInDigraph() const {
+ return mDicNodeState.mDicNodeStateScoring.getDigraphIndex()
+ != DigraphUtils::NOT_A_DIGRAPH_INDEX;
+ }
+
+ void advanceDigraphIndex() {
+ mDicNodeState.mDicNodeStateScoring.advanceDigraphIndex();
+ }
+
+ ErrorTypeUtils::ErrorType getContainedErrorTypes() const {
+ return mDicNodeState.mDicNodeStateScoring.getContainedErrorTypes();
+ }
+
+ bool isBlacklistedOrNotAWord() const {
+ return mDicNodeProperties.isBlacklistedOrNotAWord();
+ }
+
+ inline uint16_t getNodeCodePointCount() const {
+ return mDicNodeProperties.getDepth();
+ }
+
+ // Returns code point count including spaces
+ inline uint16_t getTotalNodeCodePointCount() const {
+ return getNodeCodePointCount() + mDicNodeState.mDicNodeStateOutput.getPrevWordsLength();
+ }
+
+ AK_FORCE_INLINE void dump(const char *tag) const {
+#if DEBUG_DICT
+ DUMP_WORD_AND_SCORE(tag);
+#if DEBUG_DUMP_ERROR
+ mProfiler.dump();
+#endif
+#endif
+ }
+
+ AK_FORCE_INLINE bool compare(const DicNode *right) const {
+ // Promote exact matches to prevent them from being pruned.
+ const bool leftExactMatch = ErrorTypeUtils::isExactMatch(getContainedErrorTypes());
+ const bool rightExactMatch = ErrorTypeUtils::isExactMatch(right->getContainedErrorTypes());
+ if (leftExactMatch != rightExactMatch) {
+ return leftExactMatch;
+ }
+ const float diff =
+ right->getNormalizedCompoundDistance() - getNormalizedCompoundDistance();
+ static const float MIN_DIFF = 0.000001f;
+ if (diff > MIN_DIFF) {
+ return true;
+ } else if (diff < -MIN_DIFF) {
+ return false;
+ }
+ const int depth = getNodeCodePointCount();
+ const int depthDiff = right->getNodeCodePointCount() - depth;
+ if (depthDiff != 0) {
+ return depthDiff > 0;
+ }
+ for (int i = 0; i < depth; ++i) {
+ const int codePoint = mDicNodeState.mDicNodeStateOutput.getCurrentWordCodePointAt(i);
+ const int rightCodePoint =
+ right->mDicNodeState.mDicNodeStateOutput.getCurrentWordCodePointAt(i);
+ if (codePoint != rightCodePoint) {
+ return rightCodePoint > codePoint;
+ }
+ }
+ // Compare pointer values here for stable comparison
+ return this > right;
+ }
+
+ private:
+ DicNodeProperties mDicNodeProperties;
+ DicNodeState mDicNodeState;
+ // TODO: Remove
+ bool mIsCachedForNextSuggestion;
+
+ AK_FORCE_INLINE int getTotalInputIndex() const {
+ int index = 0;
+ for (int i = 0; i < MAX_POINTER_COUNT_G; i++) {
+ index += mDicNodeState.mDicNodeStateInput.getInputIndex(i);
+ }
+ return index;
+ }
+
+ // Caveat: Must not be called outside Weighting
+ // This restriction is guaranteed by "friend"
+ AK_FORCE_INLINE void addCost(const float spatialCost, const float languageCost,
+ const bool doNormalization, const int inputSize,
+ const ErrorTypeUtils::ErrorType errorType) {
+ if (DEBUG_GEO_FULL) {
+ LOGI_SHOW_ADD_COST_PROP;
+ }
+ mDicNodeState.mDicNodeStateScoring.addCost(spatialCost, languageCost, doNormalization,
+ inputSize, getTotalInputIndex(), errorType);
+ }
+
+ // Saves the current normalized compound distance for space-aware gestures.
+ // See getNormalizedCompoundDistanceAfterFirstWord for details.
+ AK_FORCE_INLINE void saveNormalizedCompoundDistanceAfterFirstWordIfNoneYet() {
+ mDicNodeState.mDicNodeStateScoring.saveNormalizedCompoundDistanceAfterFirstWordIfNoneYet();
+ }
+
+ // Caveat: Must not be called outside Weighting
+ // This restriction is guaranteed by "friend"
+ AK_FORCE_INLINE void forwardInputIndex(const int pointerId, const int count,
+ const bool overwritesPrevCodePointByNodeCodePoint) {
+ if (count == 0) {
+ return;
+ }
+ mDicNodeState.mDicNodeStateInput.forwardInputIndex(pointerId, count);
+ if (overwritesPrevCodePointByNodeCodePoint) {
+ mDicNodeState.mDicNodeStateInput.setPrevCodePoint(0, getNodeCodePoint());
+ }
+ }
+
+ AK_FORCE_INLINE void updateInputIndexG(const DicNode_InputStateG *const inputStateG) {
+ if (mDicNodeState.mDicNodeStateOutput.getPrevWordCount() == 1 && isFirstLetter()) {
+ mDicNodeState.mDicNodeStateOutput.setSecondWordFirstInputIndex(
+ inputStateG->mInputIndex);
+ }
+ mDicNodeState.mDicNodeStateInput.updateInputIndexG(inputStateG->mPointerId,
+ inputStateG->mInputIndex, inputStateG->mPrevCodePoint,
+ inputStateG->mTerminalDiffCost, inputStateG->mRawLength);
+ mDicNodeState.mDicNodeStateScoring.addRawLength(inputStateG->mRawLength);
+ mDicNodeState.mDicNodeStateScoring.setDoubleLetterLevel(inputStateG->mDoubleLetterLevel);
+ }
+};
+} // namespace latinime
+#endif // LATINIME_DIC_NODE_H
diff --git a/third_party/android_prediction/suggest/core/dicnode/dic_node_pool.h b/third_party/android_prediction/suggest/core/dicnode/dic_node_pool.h
new file mode 100644
index 0000000..08d773c
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/dicnode/dic_node_pool.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DIC_NODE_POOL_H
+#define LATINIME_DIC_NODE_POOL_H
+
+#include <deque>
+#include <unordered_set>
+#include <vector>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/dicnode/dic_node.h"
+
+namespace latinime {
+
+class DicNodePool {
+ public:
+ explicit DicNodePool(const int capacity) : mDicNodes(), mPooledDicNodes() {
+ reset(capacity);
+ }
+
+ void reset(const int capacity) {
+ if (capacity == static_cast<int>(mDicNodes.size())
+ && capacity == static_cast<int>(mPooledDicNodes.size())) {
+ // No need to reset.
+ return;
+ }
+ mDicNodes.resize(capacity);
+ mDicNodes.shrink_to_fit();
+ mPooledDicNodes.clear();
+ for (auto &dicNode : mDicNodes) {
+ mPooledDicNodes.emplace_back(&dicNode);
+ }
+ }
+
+ // Get a DicNode instance from the pool. The instance has to be returned by returnInstance().
+ DicNode *getInstance() {
+ if (mPooledDicNodes.empty()) {
+ return nullptr;
+ }
+ DicNode *const dicNode = mPooledDicNodes.back();
+ mPooledDicNodes.pop_back();
+ return dicNode;
+ }
+
+ // Return an instance that has been removed from the pool by getInstance() to the pool. The
+ // instance must not be used after returning without getInstance().
+ void placeBackInstance(DicNode *dicNode) {
+ mPooledDicNodes.emplace_back(dicNode);
+ }
+
+ void dump() const {
+ AKLOGI("\n\n\n\n\n===========================");
+ std::unordered_set<const DicNode*> usedDicNodes;
+ for (const auto &dicNode : mDicNodes) {
+ usedDicNodes.insert(&dicNode);
+ }
+ for (const auto &dicNodePtr : mPooledDicNodes) {
+ usedDicNodes.erase(dicNodePtr);
+ }
+ for (const auto &usedDicNodePtr : usedDicNodes) {
+ usedDicNodePtr->dump("DIC_NODE_POOL: ");
+ }
+ AKLOGI("===========================\n\n\n\n\n");
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DicNodePool);
+
+ std::vector<DicNode> mDicNodes;
+ std::deque<DicNode*> mPooledDicNodes;
+};
+} // namespace latinime
+#endif // LATINIME_DIC_NODE_POOL_H
diff --git a/third_party/android_prediction/suggest/core/dicnode/dic_node_priority_queue.h b/third_party/android_prediction/suggest/core/dicnode/dic_node_priority_queue.h
new file mode 100644
index 0000000..a9ef339
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/dicnode/dic_node_priority_queue.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DIC_NODE_PRIORITY_QUEUE_H
+#define LATINIME_DIC_NODE_PRIORITY_QUEUE_H
+
+#include <algorithm>
+#include <queue>
+#include <vector>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/dicnode/dic_node.h"
+#include "third_party/android_prediction/suggest/core/dicnode/dic_node_pool.h"
+
+namespace latinime {
+
+class DicNodePriorityQueue {
+ public:
+ AK_FORCE_INLINE explicit DicNodePriorityQueue(const int capacity)
+ : mMaxSize(capacity), mDicNodesQueue(), mDicNodePool(capacity) {
+ clear();
+ }
+
+ // Non virtual inline destructor -- never inherit this class
+ AK_FORCE_INLINE ~DicNodePriorityQueue() {}
+
+ AK_FORCE_INLINE int getSize() const {
+ return static_cast<int>(mDicNodesQueue.size());
+ }
+
+ AK_FORCE_INLINE int getMaxSize() const {
+ return mMaxSize;
+ }
+
+ AK_FORCE_INLINE void setMaxSize(const int maxSize) {
+ mMaxSize = maxSize;
+ }
+
+ AK_FORCE_INLINE void clear() {
+ clearAndResize(mMaxSize);
+ }
+
+ AK_FORCE_INLINE void clearAndResize(const int maxSize) {
+ mMaxSize = maxSize;
+ while (!mDicNodesQueue.empty()) {
+ mDicNodesQueue.pop();
+ }
+ mDicNodePool.reset(mMaxSize + 1);
+ }
+
+ AK_FORCE_INLINE void copyPush(const DicNode *const dicNode) {
+ DicNode *const pooledDicNode = newDicNode(dicNode);
+ if (!pooledDicNode) {
+ return;
+ }
+ if (getSize() < mMaxSize) {
+ mDicNodesQueue.push(pooledDicNode);
+ return;
+ }
+ if (betterThanWorstDicNode(pooledDicNode)) {
+ mDicNodePool.placeBackInstance(mDicNodesQueue.top());
+ mDicNodesQueue.pop();
+ mDicNodesQueue.push(pooledDicNode);
+ return;
+ }
+ mDicNodePool.placeBackInstance(pooledDicNode);
+ }
+
+ AK_FORCE_INLINE void copyPop(DicNode *const dest) {
+ if (mDicNodesQueue.empty()) {
+ ASSERT(false);
+ return;
+ }
+ DicNode *node = mDicNodesQueue.top();
+ if (dest) {
+ DicNodeUtils::initByCopy(node, dest);
+ }
+ mDicNodePool.placeBackInstance(node);
+ mDicNodesQueue.pop();
+ }
+
+ AK_FORCE_INLINE void dump() {
+ mDicNodePool.dump();
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DicNodePriorityQueue);
+
+ AK_FORCE_INLINE static bool compareDicNode(const DicNode *const left,
+ const DicNode *const right) {
+ return left->compare(right);
+ }
+
+ struct DicNodeComparator {
+ bool operator ()(const DicNode *left, const DicNode *right) const {
+ return compareDicNode(left, right);
+ }
+ };
+
+ typedef std::priority_queue<DicNode *, std::vector<DicNode *>, DicNodeComparator> DicNodesQueue;
+ int mMaxSize;
+ DicNodesQueue mDicNodesQueue;
+ DicNodePool mDicNodePool;
+
+ AK_FORCE_INLINE bool betterThanWorstDicNode(const DicNode *const dicNode) const {
+ DicNode *worstNode = mDicNodesQueue.top();
+ if (!worstNode) {
+ return true;
+ }
+ return compareDicNode(dicNode, worstNode);
+ }
+
+ AK_FORCE_INLINE DicNode *newDicNode(const DicNode *const dicNode) {
+ DicNode *newNode = mDicNodePool.getInstance();
+ if (newNode) {
+ DicNodeUtils::initByCopy(dicNode, newNode);
+ }
+ return newNode;
+ }
+};
+} // namespace latinime
+#endif // LATINIME_DIC_NODE_PRIORITY_QUEUE_H
diff --git a/third_party/android_prediction/suggest/core/dicnode/dic_node_profiler.h b/third_party/android_prediction/suggest/core/dicnode/dic_node_profiler.h
new file mode 100644
index 0000000..0ab38ef
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/dicnode/dic_node_profiler.h
@@ -0,0 +1,188 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DIC_NODE_PROFILER_H
+#define LATINIME_DIC_NODE_PROFILER_H
+
+#include "third_party/android_prediction/defines.h"
+
+#if DEBUG_DICT
+#define PROF_SPACE_SUBSTITUTION(profiler) profiler.profSpaceSubstitution()
+#define PROF_SPACE_OMISSION(profiler) profiler.profSpaceOmission()
+#define PROF_ADDITIONAL_PROXIMITY(profiler) profiler.profAdditionalProximity()
+#define PROF_SUBSTITUTION(profiler) profiler.profSubstitution()
+#define PROF_OMISSION(profiler) profiler.profOmission()
+#define PROF_INSERTION(profiler) profiler.profInsertion()
+#define PROF_MATCH(profiler) profiler.profMatch()
+#define PROF_COMPLETION(profiler) profiler.profCompletion()
+#define PROF_TRANSPOSITION(profiler) profiler.profTransposition()
+#define PROF_NEARESTKEY(profiler) profiler.profNearestKey()
+#define PROF_TERMINAL(profiler) profiler.profTerminal()
+#define PROF_TERMINAL_INSERTION(profiler) profiler.profTerminalInsertion()
+#define PROF_NEW_WORD(profiler) profiler.profNewWord()
+#define PROF_NEW_WORD_BIGRAM(profiler) profiler.profNewWordBigram()
+#define PROF_NODE_RESET(profiler) profiler.reset()
+#define PROF_NODE_COPY(src, dest) dest.copy(src)
+#else
+#define PROF_SPACE_SUBSTITUTION(profiler)
+#define PROF_SPACE_OMISSION(profiler)
+#define PROF_ADDITONAL_PROXIMITY(profiler)
+#define PROF_SUBSTITUTION(profiler)
+#define PROF_OMISSION(profiler)
+#define PROF_INSERTION(profiler)
+#define PROF_MATCH(profiler)
+#define PROF_COMPLETION(profiler)
+#define PROF_TRANSPOSITION(profiler)
+#define PROF_NEARESTKEY(profiler)
+#define PROF_TERMINAL(profiler)
+#define PROF_TERMINAL_INSERTION(profiler)
+#define PROF_NEW_WORD(profiler)
+#define PROF_NEW_WORD_BIGRAM(profiler)
+#define PROF_NODE_RESET(profiler)
+#define PROF_NODE_COPY(src, dest)
+#endif
+
+namespace latinime {
+
+class DicNodeProfiler {
+ public:
+#if DEBUG_DICT
+ AK_FORCE_INLINE DicNodeProfiler()
+ : mProfOmission(0), mProfInsertion(0), mProfTransposition(0),
+ mProfAdditionalProximity(0), mProfSubstitution(0),
+ mProfSpaceSubstitution(0), mProfSpaceOmission(0),
+ mProfMatch(0), mProfCompletion(0), mProfTerminal(0), mProfTerminalInsertion(0),
+ mProfNearestKey(0), mProfNewWord(0), mProfNewWordBigram(0) {}
+
+ int mProfOmission;
+ int mProfInsertion;
+ int mProfTransposition;
+ int mProfAdditionalProximity;
+ int mProfSubstitution;
+ int mProfSpaceSubstitution;
+ int mProfSpaceOmission;
+ int mProfMatch;
+ int mProfCompletion;
+ int mProfTerminal;
+ int mProfTerminalInsertion;
+ int mProfNearestKey;
+ int mProfNewWord;
+ int mProfNewWordBigram;
+
+ void profSpaceSubstitution() {
+ ++mProfSpaceSubstitution;
+ }
+
+ void profSpaceOmission() {
+ ++mProfSpaceOmission;
+ }
+
+ void profAdditionalProximity() {
+ ++mProfAdditionalProximity;
+ }
+
+ void profSubstitution() {
+ ++mProfSubstitution;
+ }
+
+ void profOmission() {
+ ++mProfOmission;
+ }
+
+ void profInsertion() {
+ ++mProfInsertion;
+ }
+
+ void profMatch() {
+ ++mProfMatch;
+ }
+
+ void profCompletion() {
+ ++mProfCompletion;
+ }
+
+ void profTransposition() {
+ ++mProfTransposition;
+ }
+
+ void profNearestKey() {
+ ++mProfNearestKey;
+ }
+
+ void profTerminal() {
+ ++mProfTerminal;
+ }
+
+ void profTerminalInsertion() {
+ ++mProfTerminalInsertion;
+ }
+
+ void profNewWord() {
+ ++mProfNewWord;
+ }
+
+ void profNewWordBigram() {
+ ++mProfNewWordBigram;
+ }
+
+ void reset() {
+ mProfSpaceSubstitution = 0;
+ mProfSpaceOmission = 0;
+ mProfAdditionalProximity = 0;
+ mProfSubstitution = 0;
+ mProfOmission = 0;
+ mProfInsertion = 0;
+ mProfMatch = 0;
+ mProfCompletion = 0;
+ mProfTransposition = 0;
+ mProfNearestKey = 0;
+ mProfTerminal = 0;
+ mProfNewWord = 0;
+ mProfNewWordBigram = 0;
+ }
+
+ void copy(const DicNodeProfiler *const profiler) {
+ mProfSpaceSubstitution = profiler->mProfSpaceSubstitution;
+ mProfSpaceOmission = profiler->mProfSpaceOmission;
+ mProfAdditionalProximity = profiler->mProfAdditionalProximity;
+ mProfSubstitution = profiler->mProfSubstitution;
+ mProfOmission = profiler->mProfOmission;
+ mProfInsertion = profiler->mProfInsertion;
+ mProfMatch = profiler->mProfMatch;
+ mProfCompletion = profiler->mProfCompletion;
+ mProfTransposition = profiler->mProfTransposition;
+ mProfNearestKey = profiler->mProfNearestKey;
+ mProfTerminal = profiler->mProfTerminal;
+ mProfNewWord = profiler->mProfNewWord;
+ mProfNewWordBigram = profiler->mProfNewWordBigram;
+ }
+
+ void dump() const {
+ AKLOGI("O %d, I %d, T %d, AP %d, S %d, SS %d, SO %d, M %d, C %d, TE %d, NW = %d, NWB = %d",
+ mProfOmission, mProfInsertion, mProfTransposition, mProfAdditionalProximity,
+ mProfSubstitution, mProfSpaceSubstitution, mProfSpaceOmission, mProfMatch,
+ mProfCompletion, mProfTerminal, mProfNewWord, mProfNewWordBigram);
+ }
+#else
+ DicNodeProfiler() {}
+#endif
+ private:
+ // Caution!!!
+ // Use a default copy constructor and an assign operator because shallow copies are ok
+ // for this class
+};
+}
+#endif // LATINIME_DIC_NODE_PROFILER_H
diff --git a/third_party/android_prediction/suggest/core/dicnode/dic_node_utils.cpp b/third_party/android_prediction/suggest/core/dicnode/dic_node_utils.cpp
new file mode 100644
index 0000000..d8d2674
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/dicnode/dic_node_utils.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/core/dicnode/dic_node_utils.h"
+
+#include "third_party/android_prediction/suggest/core/dicnode/dic_node.h"
+#include "third_party/android_prediction/suggest/core/dicnode/dic_node_vector.h"
+#include "third_party/android_prediction/suggest/core/dictionary/multi_bigram_map.h"
+#include "third_party/android_prediction/suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+
+namespace latinime {
+
+///////////////////////////////
+// Node initialization utils //
+///////////////////////////////
+
+/* static */ void DicNodeUtils::initAsRoot(
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
+ const int *const prevWordsPtNodePos, DicNode *const newRootDicNode) {
+ newRootDicNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordsPtNodePos);
+}
+
+/*static */ void DicNodeUtils::initAsRootWithPreviousWord(
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
+ const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode) {
+ newRootDicNode->initAsRootWithPreviousWord(
+ prevWordLastDicNode, dictionaryStructurePolicy->getRootPosition());
+}
+
+/* static */ void DicNodeUtils::initByCopy(const DicNode *const srcDicNode,
+ DicNode *const destDicNode) {
+ destDicNode->initByCopy(srcDicNode);
+}
+
+///////////////////////////////////
+// Traverse node expansion utils //
+///////////////////////////////////
+/* static */ void DicNodeUtils::getAllChildDicNodes(const DicNode *dicNode,
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
+ DicNodeVector *const childDicNodes) {
+ if (dicNode->isTotalInputSizeExceedingLimit()) {
+ return;
+ }
+ if (!dicNode->isLeavingNode()) {
+ childDicNodes->pushPassingChild(dicNode);
+ } else {
+ dictionaryStructurePolicy->createAndGetAllChildDicNodes(dicNode, childDicNodes);
+ }
+}
+
+///////////////////
+// Scoring utils //
+///////////////////
+/**
+ * Computes the combined bigram / unigram cost for the given dicNode.
+ */
+/* static */ float DicNodeUtils::getBigramNodeImprobability(
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
+ const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) {
+ if (dicNode->hasMultipleWords() && !dicNode->isValidMultipleWordSuggestion()) {
+ return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
+ }
+ const int probability = getBigramNodeProbability(dictionaryStructurePolicy, dicNode,
+ multiBigramMap);
+ // TODO: This equation to calculate the improbability looks unreasonable. Investigate this.
+ const float cost = static_cast<float>(MAX_PROBABILITY - probability)
+ / static_cast<float>(MAX_PROBABILITY);
+ return cost;
+}
+
+/* static */ int DicNodeUtils::getBigramNodeProbability(
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
+ const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) {
+ const int unigramProbability = dicNode->getProbability();
+ if (multiBigramMap) {
+ const int *const prevWordsPtNodePos = dicNode->getPrevWordsTerminalPtNodePos();
+ return multiBigramMap->getBigramProbability(dictionaryStructurePolicy,
+ prevWordsPtNodePos, dicNode->getPtNodePos(), unigramProbability);
+ }
+ return dictionaryStructurePolicy->getProbability(unigramProbability,
+ NOT_A_PROBABILITY);
+}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/core/dicnode/dic_node_utils.h b/third_party/android_prediction/suggest/core/dicnode/dic_node_utils.h
new file mode 100644
index 0000000..fadad5f
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/dicnode/dic_node_utils.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DIC_NODE_UTILS_H
+#define LATINIME_DIC_NODE_UTILS_H
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+class DicNode;
+class DicNodeVector;
+class DictionaryStructureWithBufferPolicy;
+class MultiBigramMap;
+
+class DicNodeUtils {
+ public:
+ static void initAsRoot(
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
+ const int *const prevWordPtNodePos, DicNode *const newRootDicNode);
+ static void initAsRootWithPreviousWord(
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
+ const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode);
+ static void initByCopy(const DicNode *const srcDicNode, DicNode *const destDicNode);
+ static void getAllChildDicNodes(const DicNode *dicNode,
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
+ DicNodeVector *childDicNodes);
+ static float getBigramNodeImprobability(
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
+ const DicNode *const dicNode, MultiBigramMap *const multiBigramMap);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DicNodeUtils);
+ // Max number of bigrams to look up
+ static const int MAX_BIGRAMS_CONSIDERED_PER_CONTEXT = 500;
+
+ static int getBigramNodeProbability(
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
+ const DicNode *const dicNode, MultiBigramMap *const multiBigramMap);
+};
+} // namespace latinime
+#endif // LATINIME_DIC_NODE_UTILS_H
diff --git a/third_party/android_prediction/suggest/core/dicnode/dic_node_vector.h b/third_party/android_prediction/suggest/core/dicnode/dic_node_vector.h
new file mode 100644
index 0000000..0056407
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/dicnode/dic_node_vector.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DIC_NODE_VECTOR_H
+#define LATINIME_DIC_NODE_VECTOR_H
+
+#include <vector>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/dicnode/dic_node.h"
+
+namespace latinime {
+
+class DicNodeVector {
+ public:
+#ifdef FLAG_DBG
+ // 0 will introduce resizing the vector.
+ static const int DEFAULT_NODES_SIZE_FOR_OPTIMIZATION = 0;
+#else
+ static const int DEFAULT_NODES_SIZE_FOR_OPTIMIZATION = 60;
+#endif
+ AK_FORCE_INLINE DicNodeVector() : mDicNodes(), mLock(false) {}
+
+ // Specify the capacity of the vector
+ AK_FORCE_INLINE DicNodeVector(const int size) : mDicNodes(), mLock(false) {
+ mDicNodes.reserve(size);
+ }
+
+ // Non virtual inline destructor -- never inherit this class
+ AK_FORCE_INLINE ~DicNodeVector() {}
+
+ AK_FORCE_INLINE void clear() {
+ mDicNodes.clear();
+ mLock = false;
+ }
+
+ int getSizeAndLock() {
+ mLock = true;
+ return static_cast<int>(mDicNodes.size());
+ }
+
+ void pushPassingChild(const DicNode *dicNode) {
+ ASSERT(!mLock);
+ mDicNodes.emplace_back();
+ mDicNodes.back().initAsPassingChild(dicNode);
+ }
+
+ void pushLeavingChild(const DicNode *const dicNode, const int ptNodePos,
+ const int childrenPtNodeArrayPos, const int probability, const bool isTerminal,
+ const bool hasChildren, const bool isBlacklistedOrNotAWord,
+ const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
+ ASSERT(!mLock);
+ mDicNodes.emplace_back();
+ mDicNodes.back().initAsChild(dicNode, ptNodePos, childrenPtNodeArrayPos, probability,
+ isTerminal, hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount,
+ mergedNodeCodePoints);
+ }
+
+ DicNode *operator[](const int id) {
+ ASSERT(id < static_cast<int>(mDicNodes.size()));
+ return &mDicNodes[id];
+ }
+
+ DicNode *front() {
+ ASSERT(1 <= static_cast<int>(mDicNodes.size()));
+ return &mDicNodes.front();
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DicNodeVector);
+ std::vector<DicNode> mDicNodes;
+ bool mLock;
+};
+} // namespace latinime
+#endif // LATINIME_DIC_NODE_VECTOR_H
diff --git a/third_party/android_prediction/suggest/core/dicnode/dic_nodes_cache.cpp b/third_party/android_prediction/suggest/core/dicnode/dic_nodes_cache.cpp
new file mode 100644
index 0000000..ef919ef
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/dicnode/dic_nodes_cache.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <list>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/dicnode/dic_node_priority_queue.h"
+#include "third_party/android_prediction/suggest/core/dicnode/dic_node_utils.h"
+#include "third_party/android_prediction/suggest/core/dicnode/dic_nodes_cache.h"
+
+namespace latinime {
+
+// The biggest value among MAX_CACHE_DIC_NODE_SIZE, MAX_CACHE_DIC_NODE_SIZE_FOR_SINGLE_POINT, ...
+const int DicNodesCache::LARGE_PRIORITY_QUEUE_CAPACITY = 310;
+// Capacity for reducing memory footprint.
+const int DicNodesCache::SMALL_PRIORITY_QUEUE_CAPACITY = 100;
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/core/dicnode/dic_nodes_cache.h b/third_party/android_prediction/suggest/core/dicnode/dic_nodes_cache.h
new file mode 100644
index 0000000..52e8ba7
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/dicnode/dic_nodes_cache.h
@@ -0,0 +1,190 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DIC_NODES_CACHE_H
+#define LATINIME_DIC_NODES_CACHE_H
+
+#include <algorithm>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/dicnode/dic_node_priority_queue.h"
+
+namespace latinime {
+
+class DicNode;
+
+/**
+ * Class for controlling dicNode search priority queue and lexicon trie traversal.
+ */
+class DicNodesCache {
+ public:
+ AK_FORCE_INLINE explicit DicNodesCache(const bool usesLargeCapacityCache)
+ : mUsesLargeCapacityCache(usesLargeCapacityCache),
+ mDicNodePriorityQueue0(getCacheCapacity()),
+ mDicNodePriorityQueue1(getCacheCapacity()),
+ mDicNodePriorityQueue2(getCacheCapacity()),
+ mDicNodePriorityQueueForTerminal(MAX_RESULTS),
+ mActiveDicNodes(&mDicNodePriorityQueue0),
+ mNextActiveDicNodes(&mDicNodePriorityQueue1),
+ mCachedDicNodesForContinuousSuggestion(&mDicNodePriorityQueue2),
+ mTerminalDicNodes(&mDicNodePriorityQueueForTerminal),
+ mInputIndex(0), mLastCachedInputIndex(0) {}
+
+ AK_FORCE_INLINE virtual ~DicNodesCache() {}
+
+ AK_FORCE_INLINE void reset(const int nextActiveSize, const int terminalSize) {
+ mInputIndex = 0;
+ mLastCachedInputIndex = 0;
+ // The size of current active DicNode queue doesn't have to be changed.
+ mActiveDicNodes->clear();
+ // nextActiveSize is used to limit the next iteration's active DicNode size.
+ const int nextActiveSizeFittingToTheCapacity = std::min(nextActiveSize, getCacheCapacity());
+ mNextActiveDicNodes->clearAndResize(nextActiveSizeFittingToTheCapacity);
+ mTerminalDicNodes->clearAndResize(terminalSize);
+ // The size of cached DicNode queue doesn't have to be changed.
+ mCachedDicNodesForContinuousSuggestion->clear();
+ }
+
+ AK_FORCE_INLINE void continueSearch() {
+ resetTemporaryCaches();
+ restoreActiveDicNodesFromCache();
+ }
+
+ AK_FORCE_INLINE void advanceActiveDicNodes() {
+ if (DEBUG_DICT) {
+ AKLOGI("Advance active %d nodes.", mNextActiveDicNodes->getSize());
+ }
+ if (DEBUG_DICT_FULL) {
+ mNextActiveDicNodes->dump();
+ }
+ mNextActiveDicNodes =
+ moveNodesAndReturnReusableEmptyQueue(mNextActiveDicNodes, &mActiveDicNodes);
+ }
+
+ int activeSize() const { return mActiveDicNodes->getSize(); }
+ int terminalSize() const { return mTerminalDicNodes->getSize(); }
+ bool isLookAheadCorrectionInputIndex(const int inputIndex) const {
+ return inputIndex == mInputIndex - 1;
+ }
+ void advanceInputIndex(const int inputSize) {
+ if (mInputIndex < inputSize) {
+ mInputIndex++;
+ }
+ }
+
+ AK_FORCE_INLINE void copyPushTerminal(DicNode *dicNode) {
+ mTerminalDicNodes->copyPush(dicNode);
+ }
+
+ AK_FORCE_INLINE void copyPushActive(DicNode *dicNode) {
+ mActiveDicNodes->copyPush(dicNode);
+ }
+
+ AK_FORCE_INLINE void copyPushContinue(DicNode *dicNode) {
+ mCachedDicNodesForContinuousSuggestion->copyPush(dicNode);
+ }
+
+ AK_FORCE_INLINE void copyPushNextActive(DicNode *dicNode) {
+ mNextActiveDicNodes->copyPush(dicNode);
+ }
+
+ void popTerminal(DicNode *dest) {
+ mTerminalDicNodes->copyPop(dest);
+ }
+
+ void popActive(DicNode *dest) {
+ mActiveDicNodes->copyPop(dest);
+ }
+
+ bool hasCachedDicNodesForContinuousSuggestion() const {
+ return mCachedDicNodesForContinuousSuggestion
+ && mCachedDicNodesForContinuousSuggestion->getSize() > 0;
+ }
+
+ AK_FORCE_INLINE bool isCacheBorderForTyping(const int inputSize) const {
+ // TODO: Move this variable to header
+ static const int CACHE_BACK_LENGTH = 3;
+ const int cacheInputIndex = inputSize - CACHE_BACK_LENGTH;
+ const bool shouldCache = (cacheInputIndex == mInputIndex)
+ && (cacheInputIndex != mLastCachedInputIndex);
+ return shouldCache;
+ }
+
+ AK_FORCE_INLINE void updateLastCachedInputIndex() {
+ mLastCachedInputIndex = mInputIndex;
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DicNodesCache);
+
+ AK_FORCE_INLINE void restoreActiveDicNodesFromCache() {
+ if (DEBUG_DICT) {
+ AKLOGI("Restore %d nodes. inputIndex = %d.",
+ mCachedDicNodesForContinuousSuggestion->getSize(), mLastCachedInputIndex);
+ }
+ if (DEBUG_DICT_FULL || DEBUG_CACHE) {
+ mCachedDicNodesForContinuousSuggestion->dump();
+ }
+ mInputIndex = mLastCachedInputIndex;
+ mCachedDicNodesForContinuousSuggestion = moveNodesAndReturnReusableEmptyQueue(
+ mCachedDicNodesForContinuousSuggestion, &mActiveDicNodes);
+ }
+
+ AK_FORCE_INLINE static DicNodePriorityQueue *moveNodesAndReturnReusableEmptyQueue(
+ DicNodePriorityQueue *src, DicNodePriorityQueue **dest) {
+ const int srcMaxSize = src->getMaxSize();
+ const int destMaxSize = (*dest)->getMaxSize();
+ DicNodePriorityQueue *tmp = *dest;
+ *dest = src;
+ (*dest)->setMaxSize(destMaxSize);
+ tmp->clearAndResize(srcMaxSize);
+ return tmp;
+ }
+
+ AK_FORCE_INLINE int getCacheCapacity() const {
+ return mUsesLargeCapacityCache ?
+ LARGE_PRIORITY_QUEUE_CAPACITY : SMALL_PRIORITY_QUEUE_CAPACITY;
+ }
+
+ AK_FORCE_INLINE void resetTemporaryCaches() {
+ mActiveDicNodes->clear();
+ mNextActiveDicNodes->clear();
+ mTerminalDicNodes->clear();
+ }
+
+ static const int LARGE_PRIORITY_QUEUE_CAPACITY;
+ static const int SMALL_PRIORITY_QUEUE_CAPACITY;
+
+ const bool mUsesLargeCapacityCache;
+ // Instances
+ DicNodePriorityQueue mDicNodePriorityQueue0;
+ DicNodePriorityQueue mDicNodePriorityQueue1;
+ DicNodePriorityQueue mDicNodePriorityQueue2;
+ DicNodePriorityQueue mDicNodePriorityQueueForTerminal;
+
+ // Active dicNodes currently being expanded.
+ DicNodePriorityQueue *mActiveDicNodes;
+ // Next dicNodes to be expanded.
+ DicNodePriorityQueue *mNextActiveDicNodes;
+ // Cached dicNodes used for continuous suggestion.
+ DicNodePriorityQueue *mCachedDicNodesForContinuousSuggestion;
+ // Current top terminal dicNodes.
+ DicNodePriorityQueue *mTerminalDicNodes;
+ int mInputIndex;
+ int mLastCachedInputIndex;
+};
+} // namespace latinime
+#endif // LATINIME_DIC_NODES_CACHE_H
diff --git a/third_party/android_prediction/suggest/core/dicnode/internal/dic_node_properties.h b/third_party/android_prediction/suggest/core/dicnode/internal/dic_node_properties.h
new file mode 100644
index 0000000..1326431
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/dicnode/internal/dic_node_properties.h
@@ -0,0 +1,155 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DIC_NODE_PROPERTIES_H
+#define LATINIME_DIC_NODE_PROPERTIES_H
+
+#include <cstdint>
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+/**
+ * PtNode information related to the DicNode from the lexicon trie.
+ */
+class DicNodeProperties {
+ public:
+ AK_FORCE_INLINE DicNodeProperties()
+ : mPtNodePos(NOT_A_DICT_POS), mChildrenPtNodeArrayPos(NOT_A_DICT_POS),
+ mProbability(NOT_A_PROBABILITY), mDicNodeCodePoint(NOT_A_CODE_POINT),
+ mIsTerminal(false), mHasChildrenPtNodes(false),
+ mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0) {}
+
+ ~DicNodeProperties() {}
+
+ // Should be called only once per DicNode is initialized.
+ void init(const int pos, const int childrenPos, const int nodeCodePoint, const int probability,
+ const bool isTerminal, const bool hasChildren, const bool isBlacklistedOrNotAWord,
+ const uint16_t depth, const uint16_t leavingDepth, const int *const prevWordsNodePos) {
+ mPtNodePos = pos;
+ mChildrenPtNodeArrayPos = childrenPos;
+ mDicNodeCodePoint = nodeCodePoint;
+ mProbability = probability;
+ mIsTerminal = isTerminal;
+ mHasChildrenPtNodes = hasChildren;
+ mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord;
+ mDepth = depth;
+ mLeavingDepth = leavingDepth;
+ memmove(mPrevWordsTerminalPtNodePos, prevWordsNodePos, sizeof(mPrevWordsTerminalPtNodePos));
+ }
+
+ // Init for root with prevWordsPtNodePos which is used for n-gram
+ void init(const int rootPtNodeArrayPos, const int *const prevWordsNodePos) {
+ mPtNodePos = NOT_A_DICT_POS;
+ mChildrenPtNodeArrayPos = rootPtNodeArrayPos;
+ mDicNodeCodePoint = NOT_A_CODE_POINT;
+ mProbability = NOT_A_PROBABILITY;
+ mIsTerminal = false;
+ mHasChildrenPtNodes = true;
+ mIsBlacklistedOrNotAWord = false;
+ mDepth = 0;
+ mLeavingDepth = 0;
+ memmove(mPrevWordsTerminalPtNodePos, prevWordsNodePos, sizeof(mPrevWordsTerminalPtNodePos));
+ }
+
+ void initByCopy(const DicNodeProperties *const dicNodeProp) {
+ mPtNodePos = dicNodeProp->mPtNodePos;
+ mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos;
+ mDicNodeCodePoint = dicNodeProp->mDicNodeCodePoint;
+ mProbability = dicNodeProp->mProbability;
+ mIsTerminal = dicNodeProp->mIsTerminal;
+ mHasChildrenPtNodes = dicNodeProp->mHasChildrenPtNodes;
+ mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
+ mDepth = dicNodeProp->mDepth;
+ mLeavingDepth = dicNodeProp->mLeavingDepth;
+ memmove(mPrevWordsTerminalPtNodePos, dicNodeProp->mPrevWordsTerminalPtNodePos,
+ sizeof(mPrevWordsTerminalPtNodePos));
+ }
+
+ // Init as passing child
+ void init(const DicNodeProperties *const dicNodeProp, const int codePoint) {
+ mPtNodePos = dicNodeProp->mPtNodePos;
+ mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos;
+ mDicNodeCodePoint = codePoint; // Overwrite the node char of a passing child
+ mProbability = dicNodeProp->mProbability;
+ mIsTerminal = dicNodeProp->mIsTerminal;
+ mHasChildrenPtNodes = dicNodeProp->mHasChildrenPtNodes;
+ mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
+ mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child
+ mLeavingDepth = dicNodeProp->mLeavingDepth;
+ memmove(mPrevWordsTerminalPtNodePos, dicNodeProp->mPrevWordsTerminalPtNodePos,
+ sizeof(mPrevWordsTerminalPtNodePos));
+ }
+
+ int getPtNodePos() const {
+ return mPtNodePos;
+ }
+
+ int getChildrenPtNodeArrayPos() const {
+ return mChildrenPtNodeArrayPos;
+ }
+
+ int getProbability() const {
+ return mProbability;
+ }
+
+ int getDicNodeCodePoint() const {
+ return mDicNodeCodePoint;
+ }
+
+ uint16_t getDepth() const {
+ return mDepth;
+ }
+
+ // TODO: Move to output?
+ uint16_t getLeavingDepth() const {
+ return mLeavingDepth;
+ }
+
+ bool isTerminal() const {
+ return mIsTerminal;
+ }
+
+ bool hasChildren() const {
+ return mHasChildrenPtNodes || mDepth != mLeavingDepth;
+ }
+
+ bool isBlacklistedOrNotAWord() const {
+ return mIsBlacklistedOrNotAWord;
+ }
+
+ const int *getPrevWordsTerminalPtNodePos() const {
+ return mPrevWordsTerminalPtNodePos;
+ }
+
+ private:
+ // Caution!!!
+ // Use a default copy constructor and an assign operator because shallow copies are ok
+ // for this class
+ int mPtNodePos;
+ int mChildrenPtNodeArrayPos;
+ int mProbability;
+ int mDicNodeCodePoint;
+ bool mIsTerminal;
+ bool mHasChildrenPtNodes;
+ bool mIsBlacklistedOrNotAWord;
+ uint16_t mDepth;
+ uint16_t mLeavingDepth;
+ int mPrevWordsTerminalPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+};
+} // namespace latinime
+#endif // LATINIME_DIC_NODE_PROPERTIES_H
diff --git a/third_party/android_prediction/suggest/core/dicnode/internal/dic_node_state.h b/third_party/android_prediction/suggest/core/dicnode/internal/dic_node_state.h
new file mode 100644
index 0000000..6f7e721
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/dicnode/internal/dic_node_state.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DIC_NODE_STATE_H
+#define LATINIME_DIC_NODE_STATE_H
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/dicnode/internal/dic_node_state_input.h"
+#include "third_party/android_prediction/suggest/core/dicnode/internal/dic_node_state_output.h"
+#include "third_party/android_prediction/suggest/core/dicnode/internal/dic_node_state_scoring.h"
+
+namespace latinime {
+
+class DicNodeState {
+ public:
+ DicNodeStateInput mDicNodeStateInput;
+ DicNodeStateOutput mDicNodeStateOutput;
+ DicNodeStateScoring mDicNodeStateScoring;
+
+ AK_FORCE_INLINE DicNodeState()
+ : mDicNodeStateInput(), mDicNodeStateOutput(), mDicNodeStateScoring() {}
+
+ ~DicNodeState() {}
+
+ DicNodeState &operator=(const DicNodeState& src) {
+ initByCopy(&src);
+ return *this;
+ }
+
+ DicNodeState(const DicNodeState& src)
+ : mDicNodeStateInput(), mDicNodeStateOutput(), mDicNodeStateScoring() {
+ initByCopy(&src);
+ }
+
+ // Init for root
+ void init() {
+ mDicNodeStateInput.init();
+ mDicNodeStateOutput.init();
+ mDicNodeStateScoring.init();
+ }
+
+ // Init with previous word.
+ void initAsRootWithPreviousWord(const DicNodeState *prevWordDicNodeState,
+ const int prevWordCodePointCount) {
+ mDicNodeStateOutput.init(&prevWordDicNodeState->mDicNodeStateOutput);
+ mDicNodeStateInput.init(
+ &prevWordDicNodeState->mDicNodeStateInput, true /* resetTerminalDiffCost */);
+ mDicNodeStateScoring.initByCopy(&prevWordDicNodeState->mDicNodeStateScoring);
+ }
+
+ // Init by copy
+ AK_FORCE_INLINE void initByCopy(const DicNodeState *const src) {
+ mDicNodeStateInput.initByCopy(&src->mDicNodeStateInput);
+ mDicNodeStateOutput.initByCopy(&src->mDicNodeStateOutput);
+ mDicNodeStateScoring.initByCopy(&src->mDicNodeStateScoring);
+ }
+
+ // Init by copy and adding merged node code points.
+ void init(const DicNodeState *const src, const uint16_t mergedNodeCodePointCount,
+ const int *const mergedNodeCodePoints) {
+ initByCopy(src);
+ mDicNodeStateOutput.addMergedNodeCodePoints(
+ mergedNodeCodePointCount, mergedNodeCodePoints);
+ }
+};
+} // namespace latinime
+#endif // LATINIME_DIC_NODE_STATE_H
diff --git a/third_party/android_prediction/suggest/core/dicnode/internal/dic_node_state_input.h b/third_party/android_prediction/suggest/core/dicnode/internal/dic_node_state_input.h
new file mode 100644
index 0000000..801f287
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/dicnode/internal/dic_node_state_input.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DIC_NODE_STATE_INPUT_H
+#define LATINIME_DIC_NODE_STATE_INPUT_H
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+// TODO: Have a .cpp for this class
+class DicNodeStateInput {
+ public:
+ DicNodeStateInput() {}
+ ~DicNodeStateInput() {}
+
+ void init() {
+ for (int i = 0; i < MAX_POINTER_COUNT_G; i++) {
+ // TODO: The initial value for mInputIndex should be -1?
+ //mInputIndex[i] = i == 0 ? 0 : -1;
+ mInputIndex[i] = 0;
+ mPrevCodePoint[i] = NOT_A_CODE_POINT;
+ mTerminalDiffCost[i] = static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
+ }
+ }
+
+ void init(const DicNodeStateInput *const src, const bool resetTerminalDiffCost) {
+ for (int i = 0; i < MAX_POINTER_COUNT_G; i++) {
+ mInputIndex[i] = src->mInputIndex[i];
+ mPrevCodePoint[i] = src->mPrevCodePoint[i];
+ mTerminalDiffCost[i] = resetTerminalDiffCost ?
+ static_cast<float>(MAX_VALUE_FOR_WEIGHTING) : src->mTerminalDiffCost[i];
+ }
+ }
+
+ void updateInputIndexG(const int pointerId, const int inputIndex,
+ const int prevCodePoint, const float terminalDiffCost, const float rawLength) {
+ mInputIndex[pointerId] = inputIndex;
+ mPrevCodePoint[pointerId] = prevCodePoint;
+ mTerminalDiffCost[pointerId] = terminalDiffCost;
+ }
+
+ void initByCopy(const DicNodeStateInput *const src) {
+ init(src, false);
+ }
+
+ // For transposition
+ void setPrevCodePoint(const int pointerId, const int c) {
+ mPrevCodePoint[pointerId] = c;
+ }
+
+ void forwardInputIndex(const int pointerId, const int val) {
+ if (mInputIndex[pointerId] < 0) {
+ mInputIndex[pointerId] = val;
+ } else {
+ mInputIndex[pointerId] = mInputIndex[pointerId] + val;
+ }
+ }
+
+ int getInputIndex(const int pointerId) const {
+ // when "inputIndex" exceeds "inputSize", auto-completion needs to be done
+ return mInputIndex[pointerId];
+ }
+
+ int getPrevCodePoint(const int pointerId) const {
+ return mPrevCodePoint[pointerId];
+ }
+
+ float getTerminalDiffCost(const int pointerId) const {
+ return mTerminalDiffCost[pointerId];
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DicNodeStateInput);
+
+ int mInputIndex[MAX_POINTER_COUNT_G];
+ int mPrevCodePoint[MAX_POINTER_COUNT_G];
+ float mTerminalDiffCost[MAX_POINTER_COUNT_G];
+};
+} // namespace latinime
+#endif // LATINIME_DIC_NODE_STATE_INPUT_H
diff --git a/third_party/android_prediction/suggest/core/dicnode/internal/dic_node_state_output.h b/third_party/android_prediction/suggest/core/dicnode/internal/dic_node_state_output.h
new file mode 100644
index 0000000..b7bf7c1
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/dicnode/internal/dic_node_state_output.h
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DIC_NODE_STATE_OUTPUT_H
+#define LATINIME_DIC_NODE_STATE_OUTPUT_H
+
+#include <algorithm>
+#include <cstdint>
+#include <cstring> // for memmove()
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+// Class to have information to be output. This can contain previous words when the suggestion
+// is a multi-word suggestion.
+class DicNodeStateOutput {
+ public:
+ DicNodeStateOutput()
+ : mOutputtedCodePointCount(0), mCurrentWordStart(0), mPrevWordCount(0),
+ mPrevWordsLength(0), mPrevWordStart(0), mSecondWordFirstInputIndex(NOT_AN_INDEX) {}
+
+ ~DicNodeStateOutput() {}
+
+ // Init for root
+ void init() {
+ mOutputtedCodePointCount = 0;
+ mCurrentWordStart = 0;
+ mOutputCodePoints[0] = 0;
+ mPrevWordCount = 0;
+ mPrevWordsLength = 0;
+ mPrevWordStart = 0;
+ mSecondWordFirstInputIndex = NOT_AN_INDEX;
+ }
+
+ // Init for next word.
+ void init(const DicNodeStateOutput *const stateOutput) {
+ mOutputtedCodePointCount = stateOutput->mOutputtedCodePointCount + 1;
+ memmove(mOutputCodePoints, stateOutput->mOutputCodePoints,
+ stateOutput->mOutputtedCodePointCount * sizeof(mOutputCodePoints[0]));
+ mOutputCodePoints[stateOutput->mOutputtedCodePointCount] = KEYCODE_SPACE;
+ mCurrentWordStart = stateOutput->mOutputtedCodePointCount + 1;
+ mPrevWordCount = std::min(static_cast<int16_t>(stateOutput->mPrevWordCount + 1),
+ static_cast<int16_t>(MAX_RESULTS));
+ mPrevWordsLength = stateOutput->mOutputtedCodePointCount + 1;
+ mPrevWordStart = stateOutput->mCurrentWordStart;
+ mSecondWordFirstInputIndex = stateOutput->mSecondWordFirstInputIndex;
+ }
+
+ void initByCopy(const DicNodeStateOutput *const stateOutput) {
+ memmove(mOutputCodePoints, stateOutput->mOutputCodePoints,
+ stateOutput->mOutputtedCodePointCount * sizeof(mOutputCodePoints[0]));
+ mOutputtedCodePointCount = stateOutput->mOutputtedCodePointCount;
+ if (mOutputtedCodePointCount < MAX_WORD_LENGTH) {
+ mOutputCodePoints[mOutputtedCodePointCount] = 0;
+ }
+ mCurrentWordStart = stateOutput->mCurrentWordStart;
+ mPrevWordCount = stateOutput->mPrevWordCount;
+ mPrevWordsLength = stateOutput->mPrevWordsLength;
+ mPrevWordStart = stateOutput->mPrevWordStart;
+ mSecondWordFirstInputIndex = stateOutput->mSecondWordFirstInputIndex;
+ }
+
+ void addMergedNodeCodePoints(const uint16_t mergedNodeCodePointCount,
+ const int *const mergedNodeCodePoints) {
+ if (mergedNodeCodePoints) {
+ const int additionalCodePointCount = std::min(
+ static_cast<int>(mergedNodeCodePointCount),
+ MAX_WORD_LENGTH - mOutputtedCodePointCount);
+ memmove(&mOutputCodePoints[mOutputtedCodePointCount], mergedNodeCodePoints,
+ additionalCodePointCount * sizeof(mOutputCodePoints[0]));
+ mOutputtedCodePointCount = static_cast<uint16_t>(
+ mOutputtedCodePointCount + additionalCodePointCount);
+ if (mOutputtedCodePointCount < MAX_WORD_LENGTH) {
+ mOutputCodePoints[mOutputtedCodePointCount] = 0;
+ }
+ }
+ }
+
+ int getCurrentWordCodePointAt(const int index) const {
+ return mOutputCodePoints[mCurrentWordStart + index];
+ }
+
+ const int *getCodePointBuf() const {
+ return mOutputCodePoints;
+ }
+
+ void setSecondWordFirstInputIndex(const int inputIndex) {
+ mSecondWordFirstInputIndex = inputIndex;
+ }
+
+ int getSecondWordFirstInputIndex() const {
+ return mSecondWordFirstInputIndex;
+ }
+
+ // TODO: remove
+ int16_t getPrevWordsLength() const {
+ return mPrevWordsLength;
+ }
+
+ int16_t getPrevWordCount() const {
+ return mPrevWordCount;
+ }
+
+ int16_t getPrevWordStart() const {
+ return mPrevWordStart;
+ }
+
+ int getOutputCodePointAt(const int id) const {
+ return mOutputCodePoints[id];
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DicNodeStateOutput);
+
+ // When the DicNode represents "this is a pen":
+ // mOutputtedCodePointCount is 13, which is total code point count of "this is a pen" including
+ // spaces.
+ // mCurrentWordStart indicates the head of "pen", thus it is 10.
+ // This contains 3 previous words, "this", "is" and "a"; thus, mPrevWordCount is 3.
+ // mPrevWordsLength is length of "this is a ", which is 10.
+ // mPrevWordStart is the start index of "a"; thus, it is 8.
+ // mSecondWordFirstInputIndex is the first input index of "is".
+
+ uint16_t mOutputtedCodePointCount;
+ int mOutputCodePoints[MAX_WORD_LENGTH];
+ int16_t mCurrentWordStart;
+ // Previous word count in mOutputCodePoints.
+ int16_t mPrevWordCount;
+ // Total length of previous words in mOutputCodePoints. This is being used by the algorithm
+ // that may want to look at the previous word information.
+ int16_t mPrevWordsLength;
+ // Start index of the previous word in mOutputCodePoints. This is being used for auto commit.
+ int16_t mPrevWordStart;
+ int mSecondWordFirstInputIndex;
+};
+} // namespace latinime
+#endif // LATINIME_DIC_NODE_STATE_OUTPUT_H
diff --git a/third_party/android_prediction/suggest/core/dicnode/internal/dic_node_state_scoring.h b/third_party/android_prediction/suggest/core/dicnode/internal/dic_node_state_scoring.h
new file mode 100644
index 0000000..961ee58
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/dicnode/internal/dic_node_state_scoring.h
@@ -0,0 +1,217 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DIC_NODE_STATE_SCORING_H
+#define LATINIME_DIC_NODE_STATE_SCORING_H
+
+#include <algorithm>
+#include <cstdint>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/dictionary/digraph_utils.h"
+#include "third_party/android_prediction/suggest/core/dictionary/error_type_utils.h"
+
+namespace latinime {
+
+class DicNodeStateScoring {
+ public:
+ AK_FORCE_INLINE DicNodeStateScoring()
+ : mDoubleLetterLevel(NOT_A_DOUBLE_LETTER),
+ mDigraphIndex(DigraphUtils::NOT_A_DIGRAPH_INDEX),
+ mEditCorrectionCount(0), mProximityCorrectionCount(0), mCompletionCount(0),
+ mNormalizedCompoundDistance(0.0f), mSpatialDistance(0.0f), mLanguageDistance(0.0f),
+ mRawLength(0.0f), mContainedErrorTypes(ErrorTypeUtils::NOT_AN_ERROR),
+ mNormalizedCompoundDistanceAfterFirstWord(MAX_VALUE_FOR_WEIGHTING) {
+ }
+
+ ~DicNodeStateScoring() {}
+
+ void init() {
+ mEditCorrectionCount = 0;
+ mProximityCorrectionCount = 0;
+ mCompletionCount = 0;
+ mNormalizedCompoundDistance = 0.0f;
+ mSpatialDistance = 0.0f;
+ mLanguageDistance = 0.0f;
+ mRawLength = 0.0f;
+ mDoubleLetterLevel = NOT_A_DOUBLE_LETTER;
+ mDigraphIndex = DigraphUtils::NOT_A_DIGRAPH_INDEX;
+ mNormalizedCompoundDistanceAfterFirstWord = MAX_VALUE_FOR_WEIGHTING;
+ mContainedErrorTypes = ErrorTypeUtils::NOT_AN_ERROR;
+ }
+
+ AK_FORCE_INLINE void initByCopy(const DicNodeStateScoring *const scoring) {
+ mEditCorrectionCount = scoring->mEditCorrectionCount;
+ mProximityCorrectionCount = scoring->mProximityCorrectionCount;
+ mCompletionCount = scoring->mCompletionCount;
+ mNormalizedCompoundDistance = scoring->mNormalizedCompoundDistance;
+ mSpatialDistance = scoring->mSpatialDistance;
+ mLanguageDistance = scoring->mLanguageDistance;
+ mRawLength = scoring->mRawLength;
+ mDoubleLetterLevel = scoring->mDoubleLetterLevel;
+ mDigraphIndex = scoring->mDigraphIndex;
+ mContainedErrorTypes = scoring->mContainedErrorTypes;
+ mNormalizedCompoundDistanceAfterFirstWord =
+ scoring->mNormalizedCompoundDistanceAfterFirstWord;
+ }
+
+ void addCost(const float spatialCost, const float languageCost, const bool doNormalization,
+ const int inputSize, const int totalInputIndex,
+ const ErrorTypeUtils::ErrorType errorType) {
+ addDistance(spatialCost, languageCost, doNormalization, inputSize, totalInputIndex);
+ mContainedErrorTypes = mContainedErrorTypes | errorType;
+ if (ErrorTypeUtils::isEditCorrectionError(errorType)) {
+ ++mEditCorrectionCount;
+ }
+ if (ErrorTypeUtils::isProximityCorrectionError(errorType)) {
+ ++mProximityCorrectionCount;
+ }
+ if (ErrorTypeUtils::isCompletion(errorType)) {
+ ++mCompletionCount;
+ }
+ }
+
+ // Saves the current normalized distance for space-aware gestures.
+ // See getNormalizedCompoundDistanceAfterFirstWord for details.
+ void saveNormalizedCompoundDistanceAfterFirstWordIfNoneYet() {
+ // We get called here after each word. We only want to store the distance after
+ // the first word, so if we already have a distance we skip saving -- hence "IfNoneYet"
+ // in the method name.
+ if (mNormalizedCompoundDistanceAfterFirstWord >= MAX_VALUE_FOR_WEIGHTING) {
+ mNormalizedCompoundDistanceAfterFirstWord = getNormalizedCompoundDistance();
+ }
+ }
+
+ void addRawLength(const float rawLength) {
+ mRawLength += rawLength;
+ }
+
+ float getCompoundDistance() const {
+ return getCompoundDistance(1.0f);
+ }
+
+ float getCompoundDistance(const float languageWeight) const {
+ return mSpatialDistance + mLanguageDistance * languageWeight;
+ }
+
+ float getNormalizedCompoundDistance() const {
+ return mNormalizedCompoundDistance;
+ }
+
+ // For space-aware gestures, we store the normalized distance at the char index
+ // that ends the first word of the suggestion. We call this the distance after
+ // first word.
+ float getNormalizedCompoundDistanceAfterFirstWord() const {
+ return mNormalizedCompoundDistanceAfterFirstWord;
+ }
+
+ float getSpatialDistance() const {
+ return mSpatialDistance;
+ }
+
+ float getLanguageDistance() const {
+ return mLanguageDistance;
+ }
+
+ int16_t getEditCorrectionCount() const {
+ return mEditCorrectionCount;
+ }
+
+ int16_t getProximityCorrectionCount() const {
+ return mProximityCorrectionCount;
+ }
+
+ int16_t getCompletionCount() const {
+ return mCompletionCount;
+ }
+
+ float getRawLength() const {
+ return mRawLength;
+ }
+
+ DoubleLetterLevel getDoubleLetterLevel() const {
+ return mDoubleLetterLevel;
+ }
+
+ void setDoubleLetterLevel(DoubleLetterLevel doubleLetterLevel) {
+ switch(doubleLetterLevel) {
+ case NOT_A_DOUBLE_LETTER:
+ break;
+ case A_DOUBLE_LETTER:
+ if (mDoubleLetterLevel != A_STRONG_DOUBLE_LETTER) {
+ mDoubleLetterLevel = doubleLetterLevel;
+ }
+ break;
+ case A_STRONG_DOUBLE_LETTER:
+ mDoubleLetterLevel = doubleLetterLevel;
+ break;
+ }
+ }
+
+ DigraphUtils::DigraphCodePointIndex getDigraphIndex() const {
+ return mDigraphIndex;
+ }
+
+ void advanceDigraphIndex() {
+ switch(mDigraphIndex) {
+ case DigraphUtils::NOT_A_DIGRAPH_INDEX:
+ mDigraphIndex = DigraphUtils::FIRST_DIGRAPH_CODEPOINT;
+ break;
+ case DigraphUtils::FIRST_DIGRAPH_CODEPOINT:
+ mDigraphIndex = DigraphUtils::SECOND_DIGRAPH_CODEPOINT;
+ break;
+ case DigraphUtils::SECOND_DIGRAPH_CODEPOINT:
+ mDigraphIndex = DigraphUtils::NOT_A_DIGRAPH_INDEX;
+ break;
+ }
+ }
+
+ ErrorTypeUtils::ErrorType getContainedErrorTypes() const {
+ return mContainedErrorTypes;
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DicNodeStateScoring);
+
+ DoubleLetterLevel mDoubleLetterLevel;
+ DigraphUtils::DigraphCodePointIndex mDigraphIndex;
+
+ int16_t mEditCorrectionCount;
+ int16_t mProximityCorrectionCount;
+ int16_t mCompletionCount;
+
+ float mNormalizedCompoundDistance;
+ float mSpatialDistance;
+ float mLanguageDistance;
+ float mRawLength;
+ // All accumulated error types so far
+ ErrorTypeUtils::ErrorType mContainedErrorTypes;
+ float mNormalizedCompoundDistanceAfterFirstWord;
+
+ AK_FORCE_INLINE void addDistance(float spatialDistance, float languageDistance,
+ bool doNormalization, int inputSize, int totalInputIndex) {
+ mSpatialDistance += spatialDistance;
+ mLanguageDistance += languageDistance;
+ if (!doNormalization) {
+ mNormalizedCompoundDistance = mSpatialDistance + mLanguageDistance;
+ } else {
+ mNormalizedCompoundDistance = (mSpatialDistance + mLanguageDistance)
+ / static_cast<float>(std::max(1, totalInputIndex));
+ }
+ }
+};
+} // namespace latinime
+#endif // LATINIME_DIC_NODE_STATE_SCORING_H
diff --git a/third_party/android_prediction/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h b/third_party/android_prediction/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h
new file mode 100644
index 0000000..b59f711
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_BINARY_DICTIONARY_BIGRAMS_ITERATOR_H
+#define LATINIME_BINARY_DICTIONARY_BIGRAMS_ITERATOR_H
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/policy/dictionary_bigrams_structure_policy.h"
+
+namespace latinime {
+
+class BinaryDictionaryBigramsIterator {
+ public:
+ // Empty iterator.
+ BinaryDictionaryBigramsIterator()
+ : mBigramsStructurePolicy(nullptr), mPos(NOT_A_DICT_POS),
+ mBigramPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY), mHasNext(false) {}
+
+ BinaryDictionaryBigramsIterator(
+ const DictionaryBigramsStructurePolicy *const bigramsStructurePolicy, const int pos)
+ : mBigramsStructurePolicy(bigramsStructurePolicy), mPos(pos),
+ mBigramPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY),
+ mHasNext(pos != NOT_A_DICT_POS) {}
+
+ BinaryDictionaryBigramsIterator(BinaryDictionaryBigramsIterator &&bigramsIterator)
+ : mBigramsStructurePolicy(bigramsIterator.mBigramsStructurePolicy),
+ mPos(bigramsIterator.mPos), mBigramPos(bigramsIterator.mBigramPos),
+ mProbability(bigramsIterator.mProbability), mHasNext(bigramsIterator.mHasNext) {}
+
+ AK_FORCE_INLINE bool hasNext() const {
+ return mHasNext;
+ }
+
+ AK_FORCE_INLINE void next() {
+ mBigramsStructurePolicy->getNextBigram(&mBigramPos, &mProbability, &mHasNext, &mPos);
+ }
+
+ AK_FORCE_INLINE int getProbability() const {
+ return mProbability;
+ }
+
+ AK_FORCE_INLINE int getBigramPos() const {
+ return mBigramPos;
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryBigramsIterator);
+
+ const DictionaryBigramsStructurePolicy *const mBigramsStructurePolicy;
+ int mPos;
+ int mBigramPos;
+ int mProbability;
+ bool mHasNext;
+};
+} // namespace latinime
+#endif // LATINIME_BINARY_DICTIONARY_BIGRAMS_ITERATOR_H
diff --git a/third_party/android_prediction/suggest/core/dictionary/binary_dictionary_shortcut_iterator.h b/third_party/android_prediction/suggest/core/dictionary/binary_dictionary_shortcut_iterator.h
new file mode 100644
index 0000000..0fe0ab0
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/dictionary/binary_dictionary_shortcut_iterator.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_BINARY_DICTIONARY_SHORTCUT_ITERATOR_H
+#define LATINIME_BINARY_DICTIONARY_SHORTCUT_ITERATOR_H
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/policy/dictionary_shortcuts_structure_policy.h"
+
+namespace latinime {
+
+class BinaryDictionaryShortcutIterator {
+ public:
+ BinaryDictionaryShortcutIterator(
+ const DictionaryShortcutsStructurePolicy *const shortcutStructurePolicy,
+ const int shortcutPos)
+ : mShortcutStructurePolicy(shortcutStructurePolicy),
+ mPos(shortcutStructurePolicy->getStartPos(shortcutPos)),
+ mHasNextShortcutTarget(shortcutPos != NOT_A_DICT_POS) {}
+
+ AK_FORCE_INLINE bool hasNextShortcutTarget() const {
+ return mHasNextShortcutTarget;
+ }
+
+ // Gets the shortcut target itself as an int string and put it to outTarget, put its length
+ // to outTargetLength, put whether it is whitelist to outIsWhitelist.
+ AK_FORCE_INLINE void nextShortcutTarget(
+ const int maxDepth, int *const outTarget, int *const outTargetLength,
+ bool *const outIsWhitelist) {
+ mShortcutStructurePolicy->getNextShortcut(maxDepth, outTarget, outTargetLength,
+ outIsWhitelist, &mHasNextShortcutTarget, &mPos);
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryShortcutIterator);
+
+ const DictionaryShortcutsStructurePolicy *const mShortcutStructurePolicy;
+ int mPos;
+ bool mHasNextShortcutTarget;
+};
+} // namespace latinime
+#endif // LATINIME_BINARY_DICTIONARY_SHORTCUT_ITERATOR_H
diff --git a/third_party/android_prediction/suggest/core/dictionary/bloom_filter.h b/third_party/android_prediction/suggest/core/dictionary/bloom_filter.h
new file mode 100644
index 0000000..23b06c6
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/dictionary/bloom_filter.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_BLOOM_FILTER_H
+#define LATINIME_BLOOM_FILTER_H
+
+#include <bitset>
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+// This bloom filter is used for optimizing bigram retrieval.
+// Execution times with previous word "this" are as follows:
+// without bloom filter (use only hash_map):
+// Total 147792.34 (sum of others 147771.57)
+// with bloom filter:
+// Total 145900.64 (sum of others 145874.30)
+// always read binary dictionary:
+// Total 148603.14 (sum of others 148579.90)
+class BloomFilter {
+ public:
+ BloomFilter() : mFilter() {}
+
+ AK_FORCE_INLINE void setInFilter(const int position) {
+ mFilter.set(getIndex(position));
+ }
+
+ AK_FORCE_INLINE bool isInFilter(const int position) const {
+ return mFilter.test(getIndex(position));
+ }
+
+ private:
+ DISALLOW_ASSIGNMENT_OPERATOR(BloomFilter);
+
+ AK_FORCE_INLINE size_t getIndex(const int position) const {
+ return static_cast<size_t>(position) % BIGRAM_FILTER_MODULO;
+ }
+
+ // Size, in bits, of the bloom filter index for bigrams
+ // The probability of false positive is (1 - e ** (-kn/m))**k,
+ // where k is the number of hash functions, n the number of bigrams, and m the number of
+ // bits we can test.
+ // At the moment 100 is the maximum number of bigrams for a word with the current main
+ // dictionaries, so n = 100. 1024 buckets give us m = 1024.
+ // With 1 hash function, our false positive rate is about 9.3%, which should be enough for
+ // our uses since we are only using this to increase average performance. For the record,
+ // k = 2 gives 3.1% and k = 3 gives 1.6%. With k = 1, making m = 2048 gives 4.8%,
+ // and m = 4096 gives 2.4%.
+ // This is assigned here because it is used for bitset size.
+ // 1021 is the largest prime under 1024.
+ static const size_t BIGRAM_FILTER_MODULO = 1021;
+ std::bitset<BIGRAM_FILTER_MODULO> mFilter;
+};
+} // namespace latinime
+#endif // LATINIME_BLOOM_FILTER_H
diff --git a/third_party/android_prediction/suggest/core/dictionary/dictionary.cpp b/third_party/android_prediction/suggest/core/dictionary/dictionary.cpp
new file mode 100644
index 0000000..92b2909
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/dictionary/dictionary.cpp
@@ -0,0 +1,192 @@
+/*
+ * Copyright (C) 2009, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define LOG_TAG "LatinIME: dictionary.cpp"
+
+#include "third_party/android_prediction/suggest/core/dictionary/dictionary.h"
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/dictionary/dictionary_utils.h"
+#include "third_party/android_prediction/suggest/core/policy/dictionary_header_structure_policy.h"
+#include "third_party/android_prediction/suggest/core/result/suggestion_results.h"
+#include "third_party/android_prediction/suggest/core/session/dic_traverse_session.h"
+#include "third_party/android_prediction/suggest/core/session/prev_words_info.h"
+#include "third_party/android_prediction/suggest/core/suggest.h"
+#include "third_party/android_prediction/suggest/core/suggest_options.h"
+#include "third_party/android_prediction/suggest/policyimpl/gesture/gesture_suggest_policy_factory.h"
+#include "third_party/android_prediction/suggest/policyimpl/typing/typing_suggest_policy_factory.h"
+#include "third_party/android_prediction/utils/time_keeper.h"
+
+namespace latinime {
+
+const int Dictionary::HEADER_ATTRIBUTE_BUFFER_SIZE = 32;
+
+Dictionary::Dictionary(DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ dictionaryStructureWithBufferPolicy)
+ : mDictionaryStructureWithBufferPolicy(std::move(dictionaryStructureWithBufferPolicy)),
+ mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())),
+ mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) {
+}
+
+void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
+ int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
+ int inputSize, const PrevWordsInfo *const prevWordsInfo,
+ const SuggestOptions *const suggestOptions, const float languageWeight,
+ SuggestionResults *const outSuggestionResults) const {
+ TimeKeeper::setCurrentTime();
+ traverseSession->init(this, prevWordsInfo, suggestOptions);
+ const auto &suggest = suggestOptions->isGesture() ? mGestureSuggest : mTypingSuggest;
+ suggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
+ ycoordinates, times, pointerIds, inputCodePoints, inputSize,
+ languageWeight, outSuggestionResults);
+ if (DEBUG_DICT) {
+ outSuggestionResults->dumpSuggestions();
+ }
+}
+
+Dictionary::NgramListenerForPrediction::NgramListenerForPrediction(
+ const PrevWordsInfo *const prevWordsInfo, SuggestionResults *const suggestionResults,
+ const DictionaryStructureWithBufferPolicy *const dictStructurePolicy)
+ : mPrevWordsInfo(prevWordsInfo), mSuggestionResults(suggestionResults),
+ mDictStructurePolicy(dictStructurePolicy) {}
+
+void Dictionary::NgramListenerForPrediction::onVisitEntry(const int ngramProbability,
+ const int targetPtNodePos) {
+ if (targetPtNodePos == NOT_A_DICT_POS) {
+ return;
+ }
+ if (mPrevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)
+ && ngramProbability == NOT_A_PROBABILITY) {
+ return;
+ }
+ int targetWordCodePoints[MAX_WORD_LENGTH];
+ int unigramProbability = 0;
+ const int codePointCount = mDictStructurePolicy->
+ getCodePointsAndProbabilityAndReturnCodePointCount(targetPtNodePos,
+ MAX_WORD_LENGTH, targetWordCodePoints, &unigramProbability);
+ if (codePointCount <= 0) {
+ return;
+ }
+ const int probability = mDictStructurePolicy->getProbability(
+ unigramProbability, ngramProbability);
+ mSuggestionResults->addPrediction(targetWordCodePoints, codePointCount, probability);
+}
+
+void Dictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
+ SuggestionResults *const outSuggestionResults) const {
+ TimeKeeper::setCurrentTime();
+ NgramListenerForPrediction listener(prevWordsInfo, outSuggestionResults,
+ mDictionaryStructureWithBufferPolicy.get());
+ int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ prevWordsInfo->getPrevWordsTerminalPtNodePos(
+ mDictionaryStructureWithBufferPolicy.get(), prevWordsPtNodePos,
+ true /* tryLowerCaseSearch */);
+ mDictionaryStructureWithBufferPolicy->iterateNgramEntries(prevWordsPtNodePos, &listener);
+}
+
+int Dictionary::getProbability(const int *word, int length) const {
+ return getNgramProbability(nullptr /* prevWordsInfo */, word, length);
+}
+
+int Dictionary::getMaxProbabilityOfExactMatches(const int *word, int length) const {
+ TimeKeeper::setCurrentTime();
+ return DictionaryUtils::getMaxProbabilityOfExactMatches(
+ mDictionaryStructureWithBufferPolicy.get(), word, length);
+}
+
+int Dictionary::getNgramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word,
+ int length) const {
+ TimeKeeper::setCurrentTime();
+ int nextWordPos = mDictionaryStructureWithBufferPolicy->getTerminalPtNodePositionOfWord(word,
+ length, false /* forceLowerCaseSearch */);
+ if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY;
+ if (!prevWordsInfo) {
+ return getDictionaryStructurePolicy()->getProbabilityOfPtNode(
+ nullptr /* prevWordsPtNodePos */, nextWordPos);
+ }
+ int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ prevWordsInfo->getPrevWordsTerminalPtNodePos(
+ mDictionaryStructureWithBufferPolicy.get(), prevWordsPtNodePos,
+ true /* tryLowerCaseSearch */);
+ return getDictionaryStructurePolicy()->getProbabilityOfPtNode(prevWordsPtNodePos, nextWordPos);
+}
+
+bool Dictionary::addUnigramEntry(const int *const word, const int length,
+ const UnigramProperty *const unigramProperty) {
+ if (unigramProperty->representsBeginningOfSentence()
+ && !mDictionaryStructureWithBufferPolicy->getHeaderStructurePolicy()
+ ->supportsBeginningOfSentence()) {
+ AKLOGE("The dictionary doesn't support Beginning-of-Sentence.");
+ return false;
+ }
+ TimeKeeper::setCurrentTime();
+ return mDictionaryStructureWithBufferPolicy->addUnigramEntry(word, length, unigramProperty);
+}
+
+bool Dictionary::removeUnigramEntry(const int *const codePoints, const int codePointCount) {
+ TimeKeeper::setCurrentTime();
+ return mDictionaryStructureWithBufferPolicy->removeUnigramEntry(codePoints, codePointCount);
+}
+
+bool Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ const BigramProperty *const bigramProperty) {
+ TimeKeeper::setCurrentTime();
+ return mDictionaryStructureWithBufferPolicy->addNgramEntry(prevWordsInfo, bigramProperty);
+}
+
+bool Dictionary::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ const int *const word, const int length) {
+ TimeKeeper::setCurrentTime();
+ return mDictionaryStructureWithBufferPolicy->removeNgramEntry(prevWordsInfo, word, length);
+}
+
+bool Dictionary::flush(const char *const filePath) {
+ TimeKeeper::setCurrentTime();
+ return mDictionaryStructureWithBufferPolicy->flush(filePath);
+}
+
+bool Dictionary::flushWithGC(const char *const filePath) {
+ TimeKeeper::setCurrentTime();
+ return mDictionaryStructureWithBufferPolicy->flushWithGC(filePath);
+}
+
+bool Dictionary::needsToRunGC(const bool mindsBlockByGC) {
+ TimeKeeper::setCurrentTime();
+ return mDictionaryStructureWithBufferPolicy->needsToRunGC(mindsBlockByGC);
+}
+
+void Dictionary::getProperty(const char *const query, const int queryLength, char *const outResult,
+ const int maxResultLength) {
+ TimeKeeper::setCurrentTime();
+ return mDictionaryStructureWithBufferPolicy->getProperty(query, queryLength, outResult,
+ maxResultLength);
+}
+
+const WordProperty Dictionary::getWordProperty(const int *const codePoints,
+ const int codePointCount) {
+ TimeKeeper::setCurrentTime();
+ return mDictionaryStructureWithBufferPolicy->getWordProperty(
+ codePoints, codePointCount);
+}
+
+int Dictionary::getNextWordAndNextToken(const int token, int *const outCodePoints,
+ int *const outCodePointCount) {
+ TimeKeeper::setCurrentTime();
+ return mDictionaryStructureWithBufferPolicy->getNextWordAndNextToken(
+ token, outCodePoints, outCodePointCount);
+}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/core/dictionary/dictionary.h b/third_party/android_prediction/suggest/core/dictionary/dictionary.h
new file mode 100644
index 0000000..5b143f5
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/dictionary/dictionary.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICTIONARY_H
+#define LATINIME_DICTIONARY_H
+
+#include <memory>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/dictionary/ngram_listener.h"
+#include "third_party/android_prediction/suggest/core/dictionary/property/word_property.h"
+#include "third_party/android_prediction/suggest/core/policy/dictionary_header_structure_policy.h"
+#include "third_party/android_prediction/suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+#include "third_party/android_prediction/suggest/core/suggest_interface.h"
+
+namespace latinime {
+
+class DictionaryStructureWithBufferPolicy;
+class DicTraverseSession;
+class PrevWordsInfo;
+class ProximityInfo;
+class SuggestionResults;
+class SuggestOptions;
+
+class Dictionary {
+ public:
+ // Taken from SuggestedWords.java
+ static const int KIND_MASK_KIND = 0xFF; // Mask to get only the kind
+ static const int KIND_TYPED = 0; // What user typed
+ static const int KIND_CORRECTION = 1; // Simple correction/suggestion
+ static const int KIND_COMPLETION = 2; // Completion (suggestion with appended chars)
+ static const int KIND_WHITELIST = 3; // Whitelisted word
+ static const int KIND_BLACKLIST = 4; // Blacklisted word
+ static const int KIND_HARDCODED = 5; // Hardcoded suggestion, e.g. punctuation
+ static const int KIND_APP_DEFINED = 6; // Suggested by the application
+ static const int KIND_SHORTCUT = 7; // A shortcut
+ static const int KIND_PREDICTION = 8; // A prediction (== a suggestion with no input)
+ // KIND_RESUMED: A resumed suggestion (comes from a span, currently this type is used only
+ // in java for re-correction)
+ static const int KIND_RESUMED = 9;
+ static const int KIND_OOV_CORRECTION = 10; // Most probable string correction
+
+ static const int KIND_MASK_FLAGS = 0xFFFFFF00; // Mask to get the flags
+ static const int KIND_FLAG_POSSIBLY_OFFENSIVE = 0x80000000;
+ static const int KIND_FLAG_EXACT_MATCH = 0x40000000;
+ static const int KIND_FLAG_EXACT_MATCH_WITH_INTENTIONAL_OMISSION = 0x20000000;
+
+ Dictionary(DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ dictionaryStructureWithBufferPolicy);
+
+ void getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
+ int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
+ int inputSize, const PrevWordsInfo *const prevWordsInfo,
+ const SuggestOptions *const suggestOptions, const float languageWeight,
+ SuggestionResults *const outSuggestionResults) const;
+
+ void getPredictions(const PrevWordsInfo *const prevWordsInfo,
+ SuggestionResults *const outSuggestionResults) const;
+
+ int getProbability(const int *word, int length) const;
+
+ int getMaxProbabilityOfExactMatches(const int *word, int length) const;
+
+ int getNgramProbability(const PrevWordsInfo *const prevWordsInfo,
+ const int *word, int length) const;
+
+ bool addUnigramEntry(const int *const codePoints, const int codePointCount,
+ const UnigramProperty *const unigramProperty);
+
+ bool removeUnigramEntry(const int *const codePoints, const int codePointCount);
+
+ bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ const BigramProperty *const bigramProperty);
+
+ bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word,
+ const int length);
+
+ bool flush(const char *const filePath);
+
+ bool flushWithGC(const char *const filePath);
+
+ bool needsToRunGC(const bool mindsBlockByGC);
+
+ void getProperty(const char *const query, const int queryLength, char *const outResult,
+ const int maxResultLength);
+
+ const WordProperty getWordProperty(const int *const codePoints, const int codePointCount);
+
+ // Method to iterate all words in the dictionary.
+ // The returned token has to be used to get the next word. If token is 0, this method newly
+ // starts iterating the dictionary.
+ int getNextWordAndNextToken(const int token, int *const outCodePoints,
+ int *const outCodePointCount);
+
+ const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const {
+ return mDictionaryStructureWithBufferPolicy.get();
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Dictionary);
+
+ typedef std::unique_ptr<SuggestInterface> SuggestInterfacePtr;
+
+ class NgramListenerForPrediction : public NgramListener {
+ public:
+ NgramListenerForPrediction(const PrevWordsInfo *const prevWordsInfo,
+ SuggestionResults *const suggestionResults,
+ const DictionaryStructureWithBufferPolicy *const dictStructurePolicy);
+ virtual void onVisitEntry(const int ngramProbability, const int targetPtNodePos);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(NgramListenerForPrediction);
+
+ const PrevWordsInfo *const mPrevWordsInfo;
+ SuggestionResults *const mSuggestionResults;
+ const DictionaryStructureWithBufferPolicy *const mDictStructurePolicy;
+ };
+
+ static const int HEADER_ATTRIBUTE_BUFFER_SIZE;
+
+ const DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ mDictionaryStructureWithBufferPolicy;
+ const SuggestInterfacePtr mGestureSuggest;
+ const SuggestInterfacePtr mTypingSuggest;
+};
+} // namespace latinime
+#endif // LATINIME_DICTIONARY_H
diff --git a/third_party/android_prediction/suggest/core/dictionary/dictionary_utils.cpp b/third_party/android_prediction/suggest/core/dictionary/dictionary_utils.cpp
new file mode 100644
index 0000000..703997a
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/dictionary/dictionary_utils.cpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/core/dictionary/dictionary_utils.h"
+
+#include "third_party/android_prediction/suggest/core/dicnode/dic_node.h"
+#include "third_party/android_prediction/suggest/core/dicnode/dic_node_priority_queue.h"
+#include "third_party/android_prediction/suggest/core/dicnode/dic_node_vector.h"
+#include "third_party/android_prediction/suggest/core/dictionary/dictionary.h"
+#include "third_party/android_prediction/suggest/core/dictionary/digraph_utils.h"
+#include "third_party/android_prediction/suggest/core/session/prev_words_info.h"
+#include "third_party/android_prediction/suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+
+namespace latinime {
+
+/* static */ int DictionaryUtils::getMaxProbabilityOfExactMatches(
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
+ const int *const codePoints, const int codePointCount) {
+ std::vector<DicNode> current;
+ std::vector<DicNode> next;
+
+ // No prev words information.
+ PrevWordsInfo emptyPrevWordsInfo;
+ int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ emptyPrevWordsInfo.getPrevWordsTerminalPtNodePos(dictionaryStructurePolicy,
+ prevWordsPtNodePos, false /* tryLowerCaseSearch */);
+ current.emplace_back();
+ DicNodeUtils::initAsRoot(dictionaryStructurePolicy, prevWordsPtNodePos, ¤t.front());
+ for (int i = 0; i < codePointCount; ++i) {
+ // The base-lower input is used to ignore case errors and accent errors.
+ const int codePoint = CharUtils::toBaseLowerCase(codePoints[i]);
+ for (const DicNode &dicNode : current) {
+ if (dicNode.isInDigraph() && dicNode.getNodeCodePoint() == codePoint) {
+ next.emplace_back(dicNode);
+ next.back().advanceDigraphIndex();
+ continue;
+ }
+ processChildDicNodes(dictionaryStructurePolicy, codePoint, &dicNode, &next);
+ }
+ current.clear();
+ current.swap(next);
+ }
+
+ int maxProbability = NOT_A_PROBABILITY;
+ for (const DicNode &dicNode : current) {
+ if (!dicNode.isTerminalDicNode()) {
+ continue;
+ }
+ // dicNode can contain case errors, accent errors, intentional omissions or digraphs.
+ maxProbability = std::max(maxProbability, dicNode.getProbability());
+ }
+ return maxProbability;
+}
+
+/* static */ void DictionaryUtils::processChildDicNodes(
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
+ const int inputCodePoint, const DicNode *const parentDicNode,
+ std::vector<DicNode> *const outDicNodes) {
+ DicNodeVector childDicNodes;
+ DicNodeUtils::getAllChildDicNodes(parentDicNode, dictionaryStructurePolicy, &childDicNodes);
+ for (int childIndex = 0; childIndex < childDicNodes.getSizeAndLock(); ++childIndex) {
+ DicNode *const childDicNode = childDicNodes[childIndex];
+ const int codePoint = CharUtils::toBaseLowerCase(childDicNode->getNodeCodePoint());
+ if (inputCodePoint == codePoint) {
+ outDicNodes->emplace_back(*childDicNode);
+ }
+ if (childDicNode->canBeIntentionalOmission()) {
+ processChildDicNodes(dictionaryStructurePolicy, inputCodePoint, childDicNode,
+ outDicNodes);
+ }
+ if (DigraphUtils::hasDigraphForCodePoint(
+ dictionaryStructurePolicy->getHeaderStructurePolicy(),
+ childDicNode->getNodeCodePoint())) {
+ childDicNode->advanceDigraphIndex();
+ if (childDicNode->getNodeCodePoint() == codePoint) {
+ childDicNode->advanceDigraphIndex();
+ outDicNodes->emplace_back(*childDicNode);
+ }
+ }
+ }
+}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/core/dictionary/dictionary_utils.h b/third_party/android_prediction/suggest/core/dictionary/dictionary_utils.h
new file mode 100644
index 0000000..82f82c4
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/dictionary/dictionary_utils.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICTIONARY_UTILS_H
+#define LATINIME_DICTIONARY_UTILS_H
+
+#include <vector>
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+class DictionaryStructureWithBufferPolicy;
+class DicNode;
+
+class DictionaryUtils {
+ public:
+ static int getMaxProbabilityOfExactMatches(
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
+ const int *const codePoints, const int codePointCount);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryUtils);
+
+ static void processChildDicNodes(
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
+ const int inputCodePoint, const DicNode *const parentDicNode,
+ std::vector<DicNode> *const outDicNodes);
+};
+} // namespace latinime
+#endif // LATINIME_DICTIONARY_UTILS_H
diff --git a/third_party/android_prediction/suggest/core/dictionary/digraph_utils.cpp b/third_party/android_prediction/suggest/core/dictionary/digraph_utils.cpp
new file mode 100644
index 0000000..38a7733
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/dictionary/digraph_utils.cpp
@@ -0,0 +1,121 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/core/dictionary/digraph_utils.h"
+
+#include <cstdlib>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/policy/dictionary_header_structure_policy.h"
+#include "third_party/android_prediction/utils/char_utils.h"
+
+namespace latinime {
+
+const DigraphUtils::digraph_t DigraphUtils::GERMAN_UMLAUT_DIGRAPHS[] =
+ { { 'a', 'e', 0x00E4 }, // U+00E4 : LATIN SMALL LETTER A WITH DIAERESIS
+ { 'o', 'e', 0x00F6 }, // U+00F6 : LATIN SMALL LETTER O WITH DIAERESIS
+ { 'u', 'e', 0x00FC } }; // U+00FC : LATIN SMALL LETTER U WITH DIAERESIS
+const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] =
+ { DIGRAPH_TYPE_GERMAN_UMLAUT };
+
+/* static */ bool DigraphUtils::hasDigraphForCodePoint(
+ const DictionaryHeaderStructurePolicy *const headerPolicy,
+ const int compositeGlyphCodePoint) {
+ const DigraphUtils::DigraphType digraphType = getDigraphTypeForDictionary(headerPolicy);
+ if (DigraphUtils::getDigraphForDigraphTypeAndCodePoint(digraphType, compositeGlyphCodePoint)) {
+ return true;
+ }
+ return false;
+}
+
+// Returns the digraph type associated with the given dictionary.
+/* static */ DigraphUtils::DigraphType DigraphUtils::getDigraphTypeForDictionary(
+ const DictionaryHeaderStructurePolicy *const headerPolicy) {
+ if (headerPolicy->requiresGermanUmlautProcessing()) {
+ return DIGRAPH_TYPE_GERMAN_UMLAUT;
+ }
+ return DIGRAPH_TYPE_NONE;
+}
+
+// Returns the digraph codepoint for the given composite glyph codepoint and digraph codepoint index
+// (which specifies the first or second codepoint in the digraph).
+/* static */ int DigraphUtils::getDigraphCodePointForIndex(const int compositeGlyphCodePoint,
+ const DigraphCodePointIndex digraphCodePointIndex) {
+ if (digraphCodePointIndex == NOT_A_DIGRAPH_INDEX) {
+ return NOT_A_CODE_POINT;
+ }
+ const DigraphUtils::digraph_t *const digraph =
+ DigraphUtils::getDigraphForCodePoint(compositeGlyphCodePoint);
+ if (!digraph) {
+ return NOT_A_CODE_POINT;
+ }
+ if (digraphCodePointIndex == FIRST_DIGRAPH_CODEPOINT) {
+ return digraph->first;
+ } else if (digraphCodePointIndex == SECOND_DIGRAPH_CODEPOINT) {
+ return digraph->second;
+ }
+ ASSERT(false);
+ return NOT_A_CODE_POINT;
+}
+
+// Retrieves the set of all digraphs associated with the given digraph type.
+// Returns the size of the digraph array, or 0 if none exist.
+/* static */ int DigraphUtils::getAllDigraphsForDigraphTypeAndReturnSize(
+ const DigraphUtils::DigraphType digraphType,
+ const DigraphUtils::digraph_t **const digraphs) {
+ if (digraphType == DigraphUtils::DIGRAPH_TYPE_GERMAN_UMLAUT) {
+ *digraphs = GERMAN_UMLAUT_DIGRAPHS;
+ return NELEMS(GERMAN_UMLAUT_DIGRAPHS);
+ }
+ return 0;
+}
+
+/**
+ * Returns the digraph for the input composite glyph codepoint, or nullptr if none exists.
+ * compositeGlyphCodePoint: the method returns the digraph corresponding to this codepoint.
+ */
+/* static */ const DigraphUtils::digraph_t *DigraphUtils::getDigraphForCodePoint(
+ const int compositeGlyphCodePoint) {
+ for (size_t i = 0; i < NELEMS(USED_DIGRAPH_TYPES); i++) {
+ const DigraphUtils::digraph_t *const digraph = getDigraphForDigraphTypeAndCodePoint(
+ USED_DIGRAPH_TYPES[i], compositeGlyphCodePoint);
+ if (digraph) {
+ return digraph;
+ }
+ }
+ return nullptr;
+}
+
+/**
+ * Returns the digraph for the input composite glyph codepoint, or nullptr if none exists.
+ * digraphType: the type of digraphs supported.
+ * compositeGlyphCodePoint: the method returns the digraph corresponding to this codepoint.
+ */
+/* static */ const DigraphUtils::digraph_t *DigraphUtils::getDigraphForDigraphTypeAndCodePoint(
+ const DigraphUtils::DigraphType digraphType, const int compositeGlyphCodePoint) {
+ const DigraphUtils::digraph_t *digraphs = nullptr;
+ const int compositeGlyphLowerCodePoint = CharUtils::toLowerCase(compositeGlyphCodePoint);
+ const int digraphsSize =
+ DigraphUtils::getAllDigraphsForDigraphTypeAndReturnSize(digraphType, &digraphs);
+ for (int i = 0; i < digraphsSize; i++) {
+ if (digraphs[i].compositeGlyph == compositeGlyphLowerCodePoint) {
+ return &digraphs[i];
+ }
+ }
+ return nullptr;
+}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/core/dictionary/digraph_utils.h b/third_party/android_prediction/suggest/core/dictionary/digraph_utils.h
new file mode 100644
index 0000000..7314259
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/dictionary/digraph_utils.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef DIGRAPH_UTILS_H
+#define DIGRAPH_UTILS_H
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+class DictionaryHeaderStructurePolicy;
+
+class DigraphUtils {
+ public:
+ typedef enum {
+ NOT_A_DIGRAPH_INDEX,
+ FIRST_DIGRAPH_CODEPOINT,
+ SECOND_DIGRAPH_CODEPOINT
+ } DigraphCodePointIndex;
+
+ typedef enum {
+ DIGRAPH_TYPE_NONE,
+ DIGRAPH_TYPE_GERMAN_UMLAUT,
+ } DigraphType;
+
+ typedef struct { int first; int second; int compositeGlyph; } digraph_t;
+
+ static bool hasDigraphForCodePoint(const DictionaryHeaderStructurePolicy *const headerPolicy,
+ const int compositeGlyphCodePoint);
+ static int getDigraphCodePointForIndex(const int compositeGlyphCodePoint,
+ const DigraphCodePointIndex digraphCodePointIndex);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DigraphUtils);
+ static DigraphType getDigraphTypeForDictionary(
+ const DictionaryHeaderStructurePolicy *const headerPolicy);
+ static int getAllDigraphsForDigraphTypeAndReturnSize(
+ const DigraphType digraphType, const digraph_t **const digraphs);
+ static const digraph_t *getDigraphForCodePoint(const int compositeGlyphCodePoint);
+ static const digraph_t *getDigraphForDigraphTypeAndCodePoint(
+ const DigraphType digraphType, const int compositeGlyphCodePoint);
+
+ static const digraph_t GERMAN_UMLAUT_DIGRAPHS[];
+ static const DigraphType USED_DIGRAPH_TYPES[];
+};
+} // namespace latinime
+#endif // DIGRAPH_UTILS_H
diff --git a/third_party/android_prediction/suggest/core/dictionary/error_type_utils.cpp b/third_party/android_prediction/suggest/core/dictionary/error_type_utils.cpp
new file mode 100644
index 0000000..90af0b7
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/dictionary/error_type_utils.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/core/dictionary/error_type_utils.h"
+
+namespace latinime {
+
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::NOT_AN_ERROR = 0x0;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_CASE_ERROR = 0x1;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR = 0x2;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_DIGRAPH = 0x4;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::INTENTIONAL_OMISSION = 0x8;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::EDIT_CORRECTION = 0x10;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::PROXIMITY_CORRECTION = 0x20;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::COMPLETION = 0x40;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::NEW_WORD = 0x80;
+
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::ERRORS_TREATED_AS_AN_EXACT_MATCH =
+ NOT_AN_ERROR | MATCH_WITH_CASE_ERROR | MATCH_WITH_ACCENT_ERROR | MATCH_WITH_DIGRAPH;
+
+const ErrorTypeUtils::ErrorType
+ ErrorTypeUtils::ERRORS_TREATED_AS_AN_EXACT_MATCH_WITH_INTENTIONAL_OMISSION =
+ ERRORS_TREATED_AS_AN_EXACT_MATCH | INTENTIONAL_OMISSION;
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/core/dictionary/error_type_utils.h b/third_party/android_prediction/suggest/core/dictionary/error_type_utils.h
new file mode 100644
index 0000000..ace5a47
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/dictionary/error_type_utils.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_ERROR_TYPE_UTILS_H
+#define LATINIME_ERROR_TYPE_UTILS_H
+
+#include <cstdint>
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+class ErrorTypeUtils {
+ public:
+ // ErrorType is mainly decided by CorrectionType but it is also depending on if
+ // the correction has really been performed or not.
+ typedef uint32_t ErrorType;
+
+ static const ErrorType NOT_AN_ERROR;
+ static const ErrorType MATCH_WITH_CASE_ERROR;
+ static const ErrorType MATCH_WITH_ACCENT_ERROR;
+ static const ErrorType MATCH_WITH_DIGRAPH;
+ // Treat error as an intentional omission when the CorrectionType is omission and the node can
+ // be intentional omission.
+ static const ErrorType INTENTIONAL_OMISSION;
+ // Substitution, omission and transposition
+ static const ErrorType EDIT_CORRECTION;
+ // Proximity error
+ static const ErrorType PROXIMITY_CORRECTION;
+ // Completion
+ static const ErrorType COMPLETION;
+ // New word
+ // TODO: Remove.
+ // A new word error should be an edit correction error or a proximity correction error.
+ static const ErrorType NEW_WORD;
+
+ static bool isExactMatch(const ErrorType containedErrorTypes) {
+ return (containedErrorTypes & ~ERRORS_TREATED_AS_AN_EXACT_MATCH) == 0;
+ }
+
+ static bool isExactMatchWithIntentionalOmission(const ErrorType containedErrorTypes) {
+ return (containedErrorTypes
+ & ~ERRORS_TREATED_AS_AN_EXACT_MATCH_WITH_INTENTIONAL_OMISSION) == 0;
+ }
+
+ static bool isEditCorrectionError(const ErrorType errorType) {
+ return (errorType & EDIT_CORRECTION) != 0;
+ }
+
+ static bool isProximityCorrectionError(const ErrorType errorType) {
+ return (errorType & PROXIMITY_CORRECTION) != 0;
+ }
+
+ static bool isCompletion(const ErrorType errorType) {
+ return (errorType & COMPLETION) != 0;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(ErrorTypeUtils);
+
+ static const ErrorType ERRORS_TREATED_AS_AN_EXACT_MATCH;
+ static const ErrorType ERRORS_TREATED_AS_AN_EXACT_MATCH_WITH_INTENTIONAL_OMISSION;
+};
+} // namespace latinime
+#endif // LATINIME_ERROR_TYPE_UTILS_H
diff --git a/third_party/android_prediction/suggest/core/dictionary/multi_bigram_map.cpp b/third_party/android_prediction/suggest/core/dictionary/multi_bigram_map.cpp
new file mode 100644
index 0000000..8ddfbb4
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/dictionary/multi_bigram_map.cpp
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/core/dictionary/multi_bigram_map.h"
+
+#include <cstddef>
+#include <unordered_map>
+
+namespace latinime {
+
+// Max number of bigram maps (previous word contexts) to be cached. Increasing this number
+// could improve bigram lookup speed for multi-word suggestions, but at the cost of more memory
+// usage. Also, there are diminishing returns since the most frequently used bigrams are
+// typically near the beginning of the input and are thus the first ones to be cached. Note
+// that these bigrams are reset for each new composing word.
+const size_t MultiBigramMap::MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP = 25;
+
+// Most common previous word contexts currently have 100 bigrams
+const int MultiBigramMap::BigramMap::DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP = 100;
+
+// Look up the bigram probability for the given word pair from the cached bigram maps.
+// Also caches the bigrams if there is space remaining and they have not been cached already.
+int MultiBigramMap::getBigramProbability(
+ const DictionaryStructureWithBufferPolicy *const structurePolicy,
+ const int *const prevWordsPtNodePos, const int nextWordPosition,
+ const int unigramProbability) {
+ if (!prevWordsPtNodePos || prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
+ return structurePolicy->getProbability(unigramProbability, NOT_A_PROBABILITY);
+ }
+ std::unordered_map<int, BigramMap>::const_iterator mapPosition =
+ mBigramMaps.find(prevWordsPtNodePos[0]);
+ if (mapPosition != mBigramMaps.end()) {
+ return mapPosition->second.getBigramProbability(structurePolicy, nextWordPosition,
+ unigramProbability);
+ }
+ if (mBigramMaps.size() < MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP) {
+ addBigramsForWordPosition(structurePolicy, prevWordsPtNodePos);
+ return mBigramMaps[prevWordsPtNodePos[0]].getBigramProbability(structurePolicy,
+ nextWordPosition, unigramProbability);
+ }
+ return readBigramProbabilityFromBinaryDictionary(structurePolicy, prevWordsPtNodePos,
+ nextWordPosition, unigramProbability);
+}
+
+void MultiBigramMap::BigramMap::init(
+ const DictionaryStructureWithBufferPolicy *const structurePolicy,
+ const int *const prevWordsPtNodePos) {
+ structurePolicy->iterateNgramEntries(prevWordsPtNodePos, this /* listener */);
+}
+
+int MultiBigramMap::BigramMap::getBigramProbability(
+ const DictionaryStructureWithBufferPolicy *const structurePolicy,
+ const int nextWordPosition, const int unigramProbability) const {
+ int bigramProbability = NOT_A_PROBABILITY;
+ if (mBloomFilter.isInFilter(nextWordPosition)) {
+ const std::unordered_map<int, int>::const_iterator bigramProbabilityIt =
+ mBigramMap.find(nextWordPosition);
+ if (bigramProbabilityIt != mBigramMap.end()) {
+ bigramProbability = bigramProbabilityIt->second;
+ }
+ }
+ return structurePolicy->getProbability(unigramProbability, bigramProbability);
+}
+
+void MultiBigramMap::BigramMap::onVisitEntry(const int ngramProbability,
+ const int targetPtNodePos) {
+ if (targetPtNodePos == NOT_A_DICT_POS) {
+ return;
+ }
+ mBigramMap[targetPtNodePos] = ngramProbability;
+ mBloomFilter.setInFilter(targetPtNodePos);
+}
+
+void MultiBigramMap::addBigramsForWordPosition(
+ const DictionaryStructureWithBufferPolicy *const structurePolicy,
+ const int *const prevWordsPtNodePos) {
+ if (prevWordsPtNodePos) {
+ mBigramMaps[prevWordsPtNodePos[0]].init(structurePolicy, prevWordsPtNodePos);
+ }
+}
+
+int MultiBigramMap::readBigramProbabilityFromBinaryDictionary(
+ const DictionaryStructureWithBufferPolicy *const structurePolicy,
+ const int *const prevWordsPtNodePos, const int nextWordPosition,
+ const int unigramProbability) {
+ const int bigramProbability = structurePolicy->getProbabilityOfPtNode(prevWordsPtNodePos,
+ nextWordPosition);
+ if (bigramProbability != NOT_A_PROBABILITY) {
+ return bigramProbability;
+ }
+ return structurePolicy->getProbability(unigramProbability, NOT_A_PROBABILITY);
+}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/core/dictionary/multi_bigram_map.h b/third_party/android_prediction/suggest/core/dictionary/multi_bigram_map.h
new file mode 100644
index 0000000..379ebf6
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/dictionary/multi_bigram_map.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_MULTI_BIGRAM_MAP_H
+#define LATINIME_MULTI_BIGRAM_MAP_H
+
+#include <cstddef>
+#include <unordered_map>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
+#include "third_party/android_prediction/suggest/core/dictionary/bloom_filter.h"
+#include "third_party/android_prediction/suggest/core/dictionary/ngram_listener.h"
+#include "third_party/android_prediction/suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+
+namespace latinime {
+
+// Class for caching bigram maps for multiple previous word contexts. This is useful since the
+// algorithm needs to look up the set of bigrams for every word pair that occurs in every
+// multi-word suggestion.
+class MultiBigramMap {
+ public:
+ MultiBigramMap() : mBigramMaps() {}
+ ~MultiBigramMap() {}
+
+ // Look up the bigram probability for the given word pair from the cached bigram maps.
+ // Also caches the bigrams if there is space remaining and they have not been cached already.
+ int getBigramProbability(const DictionaryStructureWithBufferPolicy *const structurePolicy,
+ const int *const prevWordsPtNodePos, const int nextWordPosition,
+ const int unigramProbability);
+
+ void clear() {
+ mBigramMaps.clear();
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(MultiBigramMap);
+
+ class BigramMap : public NgramListener {
+ public:
+ BigramMap() : mBigramMap(DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP), mBloomFilter() {}
+ // Copy constructor needed for std::unordered_map.
+ BigramMap(const BigramMap &bigramMap)
+ : mBigramMap(bigramMap.mBigramMap), mBloomFilter(bigramMap.mBloomFilter) {}
+ virtual ~BigramMap() {}
+
+ void init(const DictionaryStructureWithBufferPolicy *const structurePolicy,
+ const int *const prevWordsPtNodePos);
+ int getBigramProbability(
+ const DictionaryStructureWithBufferPolicy *const structurePolicy,
+ const int nextWordPosition, const int unigramProbability) const;
+ virtual void onVisitEntry(const int ngramProbability, const int targetPtNodePos);
+
+ private:
+ static const int DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP;
+ std::unordered_map<int, int> mBigramMap;
+ BloomFilter mBloomFilter;
+ };
+
+ void addBigramsForWordPosition(
+ const DictionaryStructureWithBufferPolicy *const structurePolicy,
+ const int *const prevWordsPtNodePos);
+
+ int readBigramProbabilityFromBinaryDictionary(
+ const DictionaryStructureWithBufferPolicy *const structurePolicy,
+ const int *const prevWordsPtNodePos, const int nextWordPosition,
+ const int unigramProbability);
+
+ static const size_t MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP;
+ std::unordered_map<int, BigramMap> mBigramMaps;
+};
+} // namespace latinime
+#endif // LATINIME_MULTI_BIGRAM_MAP_H
diff --git a/third_party/android_prediction/suggest/core/dictionary/ngram_listener.h b/third_party/android_prediction/suggest/core/dictionary/ngram_listener.h
new file mode 100644
index 0000000..dd04817
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/dictionary/ngram_listener.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_NGRAM_LISTENER_H
+#define LATINIME_NGRAM_LISTENER_H
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+/**
+ * Interface to iterate ngram entries.
+ */
+class NgramListener {
+ public:
+ virtual void onVisitEntry(const int ngramProbability, const int targetPtNodePos) = 0;
+ virtual ~NgramListener() {};
+
+ protected:
+ NgramListener() {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(NgramListener);
+
+};
+} // namespace latinime
+#endif /* LATINIME_NGRAM_LISTENER_H */
diff --git a/third_party/android_prediction/suggest/core/dictionary/property/bigram_property.h b/third_party/android_prediction/suggest/core/dictionary/property/bigram_property.h
new file mode 100644
index 0000000..9a8700a
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/dictionary/property/bigram_property.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_BIGRAM_PROPERTY_H
+#define LATINIME_BIGRAM_PROPERTY_H
+
+#include <vector>
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+// TODO: Change to NgramProperty.
+class BigramProperty {
+ public:
+ BigramProperty(const std::vector<int> *const targetCodePoints,
+ const int probability, const int timestamp, const int level, const int count)
+ : mTargetCodePoints(*targetCodePoints), mProbability(probability),
+ mTimestamp(timestamp), mLevel(level), mCount(count) {}
+
+ const std::vector<int> *getTargetCodePoints() const {
+ return &mTargetCodePoints;
+ }
+
+ int getProbability() const {
+ return mProbability;
+ }
+
+ int getTimestamp() const {
+ return mTimestamp;
+ }
+
+ int getLevel() const {
+ return mLevel;
+ }
+
+ int getCount() const {
+ return mCount;
+ }
+
+ private:
+ // Default copy constructor and assign operator are used for using in std::vector.
+ DISALLOW_DEFAULT_CONSTRUCTOR(BigramProperty);
+
+ // TODO: Make members const.
+ std::vector<int> mTargetCodePoints;
+ int mProbability;
+ int mTimestamp;
+ int mLevel;
+ int mCount;
+};
+} // namespace latinime
+#endif // LATINIME_WORD_PROPERTY_H
diff --git a/third_party/android_prediction/suggest/core/dictionary/property/unigram_property.h b/third_party/android_prediction/suggest/core/dictionary/property/unigram_property.h
new file mode 100644
index 0000000..be0b79e
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/dictionary/property/unigram_property.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_UNIGRAM_PROPERTY_H
+#define LATINIME_UNIGRAM_PROPERTY_H
+
+#include <vector>
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+class UnigramProperty {
+ public:
+ class ShortcutProperty {
+ public:
+ ShortcutProperty(const std::vector<int> *const targetCodePoints, const int probability)
+ : mTargetCodePoints(*targetCodePoints), mProbability(probability) {}
+
+ const std::vector<int> *getTargetCodePoints() const {
+ return &mTargetCodePoints;
+ }
+
+ int getProbability() const {
+ return mProbability;
+ }
+
+ private:
+ // Default copy constructor and assign operator are used for using in std::vector.
+ DISALLOW_DEFAULT_CONSTRUCTOR(ShortcutProperty);
+
+ // TODO: Make members const.
+ std::vector<int> mTargetCodePoints;
+ int mProbability;
+ };
+
+ UnigramProperty()
+ : mRepresentsBeginningOfSentence(false), mIsNotAWord(false), mIsBlacklisted(false),
+ mProbability(NOT_A_PROBABILITY), mTimestamp(NOT_A_TIMESTAMP), mLevel(0), mCount(0),
+ mShortcuts() {}
+
+ UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord,
+ const bool isBlacklisted, const int probability, const int timestamp, const int level,
+ const int count, const std::vector<ShortcutProperty> *const shortcuts)
+ : mRepresentsBeginningOfSentence(representsBeginningOfSentence),
+ mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability),
+ mTimestamp(timestamp), mLevel(level), mCount(count), mShortcuts(*shortcuts) {}
+
+ bool representsBeginningOfSentence() const {
+ return mRepresentsBeginningOfSentence;
+ }
+
+ bool isNotAWord() const {
+ return mIsNotAWord;
+ }
+
+ bool isBlacklisted() const {
+ return mIsBlacklisted;
+ }
+
+ bool hasShortcuts() const {
+ return !mShortcuts.empty();
+ }
+
+ int getProbability() const {
+ return mProbability;
+ }
+
+ int getTimestamp() const {
+ return mTimestamp;
+ }
+
+ int getLevel() const {
+ return mLevel;
+ }
+
+ int getCount() const {
+ return mCount;
+ }
+
+ const std::vector<ShortcutProperty> &getShortcuts() const {
+ return mShortcuts;
+ }
+
+ private:
+ // Default copy constructor is used for using as a return value.
+ DISALLOW_ASSIGNMENT_OPERATOR(UnigramProperty);
+
+ // TODO: Make members const.
+ bool mRepresentsBeginningOfSentence;
+ bool mIsNotAWord;
+ bool mIsBlacklisted;
+ int mProbability;
+ // Historical information
+ int mTimestamp;
+ int mLevel;
+ int mCount;
+ std::vector<ShortcutProperty> mShortcuts;
+};
+} // namespace latinime
+#endif // LATINIME_UNIGRAM_PROPERTY_H
diff --git a/third_party/android_prediction/suggest/core/dictionary/property/word_property.h b/third_party/android_prediction/suggest/core/dictionary/property/word_property.h
new file mode 100644
index 0000000..9e974e4
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/dictionary/property/word_property.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_WORD_PROPERTY_H
+#define LATINIME_WORD_PROPERTY_H
+
+#include <vector>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/dictionary/property/bigram_property.h"
+#include "third_party/android_prediction/suggest/core/dictionary/property/unigram_property.h"
+
+namespace latinime {
+
+// This class is used for returning information belonging to a word to java side.
+class WordProperty {
+ public:
+ // Default constructor is used to create an instance that indicates an invalid word.
+ WordProperty()
+ : mCodePoints(), mUnigramProperty(), mBigrams() {}
+
+ WordProperty(const std::vector<int> *const codePoints,
+ const UnigramProperty *const unigramProperty,
+ const std::vector<BigramProperty> *const bigrams)
+ : mCodePoints(*codePoints), mUnigramProperty(*unigramProperty), mBigrams(*bigrams) {}
+
+ const UnigramProperty *getUnigramProperty() const {
+ return &mUnigramProperty;
+ }
+
+ const std::vector<BigramProperty> *getBigramProperties() const {
+ return &mBigrams;
+ }
+
+ private:
+ // Default copy constructor is used for using as a return value.
+ DISALLOW_ASSIGNMENT_OPERATOR(WordProperty);
+
+ const std::vector<int> mCodePoints;
+ const UnigramProperty mUnigramProperty;
+ const std::vector<BigramProperty> mBigrams;
+};
+} // namespace latinime
+#endif // LATINIME_WORD_PROPERTY_H
diff --git a/third_party/android_prediction/suggest/core/layout/additional_proximity_chars.cpp b/third_party/android_prediction/suggest/core/layout/additional_proximity_chars.cpp
new file mode 100644
index 0000000..72cffb3
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/layout/additional_proximity_chars.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/core/layout/additional_proximity_chars.h"
+
+namespace latinime {
+// TODO: Stop using hardcoded additional proximity characters.
+// TODO: Have proximity character informations in each language's binary dictionary.
+const char *AdditionalProximityChars::LOCALE_EN_US = "en";
+
+const int AdditionalProximityChars::EN_US_ADDITIONAL_A[EN_US_ADDITIONAL_A_SIZE] = {
+ 'e', 'i', 'o', 'u'
+};
+
+const int AdditionalProximityChars::EN_US_ADDITIONAL_E[EN_US_ADDITIONAL_E_SIZE] = {
+ 'a', 'i', 'o', 'u'
+};
+
+const int AdditionalProximityChars::EN_US_ADDITIONAL_I[EN_US_ADDITIONAL_I_SIZE] = {
+ 'a', 'e', 'o', 'u'
+};
+
+const int AdditionalProximityChars::EN_US_ADDITIONAL_O[EN_US_ADDITIONAL_O_SIZE] = {
+ 'a', 'e', 'i', 'u'
+};
+
+const int AdditionalProximityChars::EN_US_ADDITIONAL_U[EN_US_ADDITIONAL_U_SIZE] = {
+ 'a', 'e', 'i', 'o'
+};
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/core/layout/additional_proximity_chars.h b/third_party/android_prediction/suggest/core/layout/additional_proximity_chars.h
new file mode 100644
index 0000000..db95e9f
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/layout/additional_proximity_chars.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_ADDITIONAL_PROXIMITY_CHARS_H
+#define LATINIME_ADDITIONAL_PROXIMITY_CHARS_H
+
+#include <cstring>
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+class AdditionalProximityChars {
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(AdditionalProximityChars);
+ static const char *LOCALE_EN_US;
+ static const int EN_US_ADDITIONAL_A_SIZE = 4;
+ static const int EN_US_ADDITIONAL_A[];
+ static const int EN_US_ADDITIONAL_E_SIZE = 4;
+ static const int EN_US_ADDITIONAL_E[];
+ static const int EN_US_ADDITIONAL_I_SIZE = 4;
+ static const int EN_US_ADDITIONAL_I[];
+ static const int EN_US_ADDITIONAL_O_SIZE = 4;
+ static const int EN_US_ADDITIONAL_O[];
+ static const int EN_US_ADDITIONAL_U_SIZE = 4;
+ static const int EN_US_ADDITIONAL_U[];
+
+ AK_FORCE_INLINE static bool isEnLocale(const char *localeStr) {
+ const size_t LOCALE_EN_US_SIZE = strlen(LOCALE_EN_US);
+ return localeStr && strlen(localeStr) >= LOCALE_EN_US_SIZE
+ && strncmp(localeStr, LOCALE_EN_US, LOCALE_EN_US_SIZE) == 0;
+ }
+
+ public:
+ static int getAdditionalCharsSize(const char *const localeStr, const int c) {
+ if (!isEnLocale(localeStr)) {
+ return 0;
+ }
+ switch (c) {
+ case 'a':
+ return EN_US_ADDITIONAL_A_SIZE;
+ case 'e':
+ return EN_US_ADDITIONAL_E_SIZE;
+ case 'i':
+ return EN_US_ADDITIONAL_I_SIZE;
+ case 'o':
+ return EN_US_ADDITIONAL_O_SIZE;
+ case 'u':
+ return EN_US_ADDITIONAL_U_SIZE;
+ default:
+ return 0;
+ }
+ }
+
+ static const int *getAdditionalChars(const char *const localeStr, const int c) {
+ if (!isEnLocale(localeStr)) {
+ return 0;
+ }
+ switch (c) {
+ case 'a':
+ return EN_US_ADDITIONAL_A;
+ case 'e':
+ return EN_US_ADDITIONAL_E;
+ case 'i':
+ return EN_US_ADDITIONAL_I;
+ case 'o':
+ return EN_US_ADDITIONAL_O;
+ case 'u':
+ return EN_US_ADDITIONAL_U;
+ default:
+ return 0;
+ }
+ }
+};
+} // namespace latinime
+#endif // LATINIME_ADDITIONAL_PROXIMITY_CHARS_H
diff --git a/third_party/android_prediction/suggest/core/layout/geometry_utils.h b/third_party/android_prediction/suggest/core/layout/geometry_utils.h
new file mode 100644
index 0000000..5ead27f
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/layout/geometry_utils.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_GEOMETRY_UTILS_H
+#define LATINIME_GEOMETRY_UTILS_H
+
+#include <cmath>
+
+#include "third_party/android_prediction/defines.h"
+
+#define ROUND_FLOAT_10000(f) ((f) < 1000.0f && (f) > 0.001f) \
+ ? (floorf((f) * 10000.0f) / 10000.0f) : (f)
+
+namespace latinime {
+
+class GeometryUtils {
+ public:
+ static inline float SQUARE_FLOAT(const float x) { return x * x; }
+
+ static AK_FORCE_INLINE float getAngle(const int x1, const int y1, const int x2, const int y2) {
+ const int dx = x1 - x2;
+ const int dy = y1 - y2;
+ if (dx == 0 && dy == 0) return 0.0f;
+ return atan2f(static_cast<float>(dy), static_cast<float>(dx));
+ }
+
+ static AK_FORCE_INLINE float getAngleDiff(const float a1, const float a2) {
+ const float deltaA = fabsf(a1 - a2);
+ const float diff = ROUND_FLOAT_10000(deltaA);
+ if (diff > M_PI_F) {
+ const float normalizedDiff = 2.0f * M_PI_F - diff;
+ return ROUND_FLOAT_10000(normalizedDiff);
+ }
+ return diff;
+ }
+
+ static AK_FORCE_INLINE int getDistanceInt(const int x1, const int y1, const int x2,
+ const int y2) {
+ return static_cast<int>(hypotf(static_cast<float>(x1 - x2), static_cast<float>(y1 - y2)));
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(GeometryUtils);
+};
+} // namespace latinime
+#endif // LATINIME_GEOMETRY_UTILS_H
diff --git a/third_party/android_prediction/suggest/core/layout/normal_distribution.h b/third_party/android_prediction/suggest/core/layout/normal_distribution.h
new file mode 100644
index 0000000..ab86b17
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/layout/normal_distribution.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_NORMAL_DISTRIBUTION_H
+#define LATINIME_NORMAL_DISTRIBUTION_H
+
+#include <cmath>
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+// Normal distribution N(u, sigma^2).
+class NormalDistribution {
+ public:
+ NormalDistribution(const float u, const float sigma)
+ : mU(u),
+ mPreComputedNonExpPart(1.0f / sqrtf(2.0f * M_PI_F
+ * GeometryUtils::SQUARE_FLOAT(sigma))),
+ mPreComputedExponentPart(-1.0f / (2.0f * GeometryUtils::SQUARE_FLOAT(sigma))) {}
+
+ float getProbabilityDensity(const float x) const {
+ const float shiftedX = x - mU;
+ return mPreComputedNonExpPart
+ * expf(mPreComputedExponentPart * GeometryUtils::SQUARE_FLOAT(shiftedX));
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(NormalDistribution);
+
+ const float mU; // mean value
+ const float mPreComputedNonExpPart; // = 1 / sqrt(2 * PI * sigma^2)
+ const float mPreComputedExponentPart; // = -1 / (2 * sigma^2)
+};
+} // namespace latinime
+#endif // LATINIME_NORMAL_DISTRIBUTION_H
diff --git a/third_party/android_prediction/suggest/core/layout/normal_distribution_2d.h b/third_party/android_prediction/suggest/core/layout/normal_distribution_2d.h
new file mode 100644
index 0000000..2f8c4da
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/layout/normal_distribution_2d.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_NORMAL_DISTRIBUTION_2D_H
+#define LATINIME_NORMAL_DISTRIBUTION_2D_H
+
+#include <cmath>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/layout/geometry_utils.h"
+#include "third_party/android_prediction/suggest/core/layout/normal_distribution.h"
+
+namespace latinime {
+
+// Normal distribution on a 2D plane. The covariance is always zero, but the distribution can be
+// rotated.
+class NormalDistribution2D {
+ public:
+ NormalDistribution2D(const float uX, const float sigmaX, const float uY, const float sigmaY,
+ const float theta)
+ : mXDistribution(0.0f, sigmaX), mYDistribution(0.0f, sigmaY), mUX(uX), mUY(uY),
+ mSinTheta(sinf(theta)), mCosTheta(cosf(theta)) {}
+
+ float getProbabilityDensity(const float x, const float y) const {
+ // Shift
+ const float shiftedX = x - mUX;
+ const float shiftedY = y - mUY;
+ // Rotate
+ const float rotatedShiftedX = mCosTheta * shiftedX + mSinTheta * shiftedY;
+ const float rotatedShiftedY = -mSinTheta * shiftedX + mCosTheta * shiftedY;
+ return mXDistribution.getProbabilityDensity(rotatedShiftedX)
+ * mYDistribution.getProbabilityDensity(rotatedShiftedY);
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(NormalDistribution2D);
+
+ const NormalDistribution mXDistribution;
+ const NormalDistribution mYDistribution;
+ const float mUX;
+ const float mUY;
+ const float mSinTheta;
+ const float mCosTheta;
+};
+} // namespace latinime
+#endif // LATINIME_NORMAL_DISTRIBUTION_2D_H
diff --git a/third_party/android_prediction/suggest/core/layout/proximity_info.cpp b/third_party/android_prediction/suggest/core/layout/proximity_info.cpp
new file mode 100644
index 0000000..ffd977b
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/layout/proximity_info.cpp
@@ -0,0 +1,263 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define LOG_TAG "LatinIME: proximity_info.cpp"
+
+#include "third_party/android_prediction/suggest/core/layout/proximity_info.h"
+
+#include <algorithm>
+#include <cstring>
+#include <cmath>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/layout/additional_proximity_chars.h"
+#include "third_party/android_prediction/suggest/core/layout/geometry_utils.h"
+#include "third_party/android_prediction/suggest/core/layout/proximity_info_params.h"
+#include "third_party/android_prediction/utils/char_utils.h"
+
+namespace latinime {
+
+static AK_FORCE_INLINE void safeGetOrFillZeroIntArrayRegion(const int *jArray,
+ int len, int *buffer) {
+ if (jArray && buffer) {
+ for (int i = 0; i < len; i++) {
+ buffer[i] = jArray[i];
+ }
+ } else if (buffer) {
+ memset(buffer, 0, len * sizeof(buffer[0]));
+ }
+}
+
+static AK_FORCE_INLINE void safeGetOrFillZeroFloatArrayRegion(const float *jArray,
+ int len, float *buffer) {
+ if (jArray && buffer) {
+ for (int i = 0; i < len; i++) {
+ buffer[i] = jArray[i];
+ }
+ } else if (buffer) {
+ memset(buffer, 0, len * sizeof(buffer[0]));
+ }
+}
+
+ProximityInfo::ProximityInfo(const std::string localeJStr,
+ const int keyboardWidth, const int keyboardHeight, const int gridWidth,
+ const int gridHeight, const int mostCommonKeyWidth, const int mostCommonKeyHeight,
+ int *proximityChars, int proximitySize, const int keyCount, const int *keyXCoordinates,
+ const int *keyYCoordinates, const int *keyWidths, const int *keyHeights,
+ const int *keyCharCodes, const float *sweetSpotCenterXs,
+ const float *sweetSpotCenterYs, const float *sweetSpotRadii)
+ : GRID_WIDTH(gridWidth), GRID_HEIGHT(gridHeight), MOST_COMMON_KEY_WIDTH(mostCommonKeyWidth),
+ MOST_COMMON_KEY_WIDTH_SQUARE(mostCommonKeyWidth * mostCommonKeyWidth),
+ NORMALIZED_SQUARED_MOST_COMMON_KEY_HYPOTENUSE(1.0f +
+ GeometryUtils::SQUARE_FLOAT(static_cast<float>(mostCommonKeyHeight) /
+ static_cast<float>(mostCommonKeyWidth))),
+ CELL_WIDTH((keyboardWidth + gridWidth - 1) / gridWidth),
+ CELL_HEIGHT((keyboardHeight + gridHeight - 1) / gridHeight),
+ KEY_COUNT(std::min(keyCount, MAX_KEY_COUNT_IN_A_KEYBOARD)),
+ KEYBOARD_WIDTH(keyboardWidth), KEYBOARD_HEIGHT(keyboardHeight),
+ KEYBOARD_HYPOTENUSE(hypotf(KEYBOARD_WIDTH, KEYBOARD_HEIGHT)),
+ HAS_TOUCH_POSITION_CORRECTION_DATA(keyCount > 0 && keyXCoordinates && keyYCoordinates
+ && keyWidths && keyHeights && keyCharCodes && sweetSpotCenterXs
+ && sweetSpotCenterYs && sweetSpotRadii),
+ mProximityCharsArray(new int[GRID_WIDTH * GRID_HEIGHT * MAX_PROXIMITY_CHARS_SIZE
+ /* proximityCharsLength */]),
+ mLowerCodePointToKeyMap() {
+ /* Let's check the input array length here to make sure */
+ int proximityCharsLength = proximitySize;
+ if (proximityCharsLength != GRID_WIDTH * GRID_HEIGHT * MAX_PROXIMITY_CHARS_SIZE) {
+ AKLOGE("Invalid proximityCharsLength: %d", proximityCharsLength);
+ ASSERT(false);
+ return;
+ }
+ if (DEBUG_PROXIMITY_INFO) {
+ AKLOGI("Create proximity info array %d", proximityCharsLength);
+ }
+ const int localeCStrUtf8Length = localeJStr.length();
+ if (localeCStrUtf8Length >= MAX_LOCALE_STRING_LENGTH) {
+ AKLOGI("Locale string length too long: length=%d", localeCStrUtf8Length);
+ ASSERT(false);
+ }
+ memset(mLocaleStr, 0, sizeof(mLocaleStr));
+ for (int i = 0; i < localeCStrUtf8Length; i++) {
+ mLocaleStr[i] = localeJStr[i];
+ }
+ safeGetOrFillZeroIntArrayRegion(proximityChars, proximityCharsLength,
+ mProximityCharsArray);
+ safeGetOrFillZeroIntArrayRegion(keyXCoordinates, KEY_COUNT, mKeyXCoordinates);
+ safeGetOrFillZeroIntArrayRegion(keyYCoordinates, KEY_COUNT, mKeyYCoordinates);
+ safeGetOrFillZeroIntArrayRegion(keyWidths, KEY_COUNT, mKeyWidths);
+ safeGetOrFillZeroIntArrayRegion(keyHeights, KEY_COUNT, mKeyHeights);
+ safeGetOrFillZeroIntArrayRegion(keyCharCodes, KEY_COUNT, mKeyCodePoints);
+ safeGetOrFillZeroFloatArrayRegion(sweetSpotCenterXs, KEY_COUNT, mSweetSpotCenterXs);
+ safeGetOrFillZeroFloatArrayRegion(sweetSpotCenterYs, KEY_COUNT, mSweetSpotCenterYs);
+ safeGetOrFillZeroFloatArrayRegion(sweetSpotRadii, KEY_COUNT, mSweetSpotRadii);
+ initializeG();
+}
+
+ProximityInfo::~ProximityInfo() {
+ delete[] mProximityCharsArray;
+}
+
+bool ProximityInfo::hasSpaceProximity(const int x, const int y) const {
+ if (x < 0 || y < 0) {
+ if (DEBUG_DICT) {
+ AKLOGI("HasSpaceProximity: Illegal coordinates (%d, %d)", x, y);
+ // TODO: Enable this assertion.
+ //ASSERT(false);
+ }
+ return false;
+ }
+
+ const int startIndex = ProximityInfoUtils::getStartIndexFromCoordinates(x, y,
+ CELL_HEIGHT, CELL_WIDTH, GRID_WIDTH);
+ if (DEBUG_PROXIMITY_INFO) {
+ AKLOGI("hasSpaceProximity: index %d, %d, %d", startIndex, x, y);
+ }
+ int *proximityCharsArray = mProximityCharsArray;
+ for (int i = 0; i < MAX_PROXIMITY_CHARS_SIZE; ++i) {
+ if (DEBUG_PROXIMITY_INFO) {
+ AKLOGI("Index: %d", mProximityCharsArray[startIndex + i]);
+ }
+ if (proximityCharsArray[startIndex + i] == KEYCODE_SPACE) {
+ return true;
+ }
+ }
+ return false;
+}
+
+float ProximityInfo::getNormalizedSquaredDistanceFromCenterFloatG(
+ const int keyId, const int x, const int y, const bool isGeometric) const {
+ const float centerX = static_cast<float>(getKeyCenterXOfKeyIdG(keyId, x, isGeometric));
+ const float centerY = static_cast<float>(getKeyCenterYOfKeyIdG(keyId, y, isGeometric));
+ const float touchX = static_cast<float>(x);
+ const float touchY = static_cast<float>(y);
+ return ProximityInfoUtils::getSquaredDistanceFloat(centerX, centerY, touchX, touchY)
+ / GeometryUtils::SQUARE_FLOAT(static_cast<float>(getMostCommonKeyWidth()));
+}
+
+int ProximityInfo::getCodePointOf(const int keyIndex) const {
+ if (keyIndex < 0 || keyIndex >= KEY_COUNT) {
+ return NOT_A_CODE_POINT;
+ }
+ return mKeyIndexToLowerCodePointG[keyIndex];
+}
+
+int ProximityInfo::getOriginalCodePointOf(const int keyIndex) const {
+ if (keyIndex < 0 || keyIndex >= KEY_COUNT) {
+ return NOT_A_CODE_POINT;
+ }
+ return mKeyIndexToOriginalCodePoint[keyIndex];
+}
+
+void ProximityInfo::initializeG() {
+ // TODO: Optimize
+ for (int i = 0; i < KEY_COUNT; ++i) {
+ const int code = mKeyCodePoints[i];
+ const int lowerCode = CharUtils::toLowerCase(code);
+ mCenterXsG[i] = mKeyXCoordinates[i] + mKeyWidths[i] / 2;
+ mCenterYsG[i] = mKeyYCoordinates[i] + mKeyHeights[i] / 2;
+ if (hasTouchPositionCorrectionData()) {
+ // Computes sweet spot center points for geometric input.
+ const float verticalScale = ProximityInfoParams::VERTICAL_SWEET_SPOT_SCALE_G;
+ const float sweetSpotCenterY = static_cast<float>(mSweetSpotCenterYs[i]);
+ const float gapY = sweetSpotCenterY - mCenterYsG[i];
+ mSweetSpotCenterYsG[i] = static_cast<int>(mCenterYsG[i] + gapY * verticalScale);
+ }
+ mLowerCodePointToKeyMap[lowerCode] = i;
+ mKeyIndexToOriginalCodePoint[i] = code;
+ mKeyIndexToLowerCodePointG[i] = lowerCode;
+ }
+ for (int i = 0; i < KEY_COUNT; i++) {
+ mKeyKeyDistancesG[i][i] = 0;
+ for (int j = i + 1; j < KEY_COUNT; j++) {
+ if (hasTouchPositionCorrectionData()) {
+ // Computes distances using sweet spots if they exist.
+ // We have two types of Y coordinate sweet spots, for geometric and for the others.
+ // The sweet spots for geometric input are used for calculating key-key distances
+ // here.
+ mKeyKeyDistancesG[i][j] = GeometryUtils::getDistanceInt(
+ mSweetSpotCenterXs[i], mSweetSpotCenterYsG[i],
+ mSweetSpotCenterXs[j], mSweetSpotCenterYsG[j]);
+ } else {
+ mKeyKeyDistancesG[i][j] = GeometryUtils::getDistanceInt(
+ mCenterXsG[i], mCenterYsG[i], mCenterXsG[j], mCenterYsG[j]);
+ }
+ mKeyKeyDistancesG[j][i] = mKeyKeyDistancesG[i][j];
+ }
+ }
+}
+
+// referencePointX is used only for keys wider than most common key width. When the referencePointX
+// is NOT_A_COORDINATE, this method calculates the return value without using the line segment.
+// isGeometric is currently not used because we don't have extra X coordinates sweet spots for
+// geometric input.
+int ProximityInfo::getKeyCenterXOfKeyIdG(
+ const int keyId, const int referencePointX, const bool isGeometric) const {
+ if (keyId < 0) {
+ return 0;
+ }
+ int centerX = (hasTouchPositionCorrectionData()) ? static_cast<int>(mSweetSpotCenterXs[keyId])
+ : mCenterXsG[keyId];
+ const int keyWidth = mKeyWidths[keyId];
+ if (referencePointX != NOT_A_COORDINATE
+ && keyWidth > getMostCommonKeyWidth()) {
+ // For keys wider than most common keys, we use a line segment instead of the center point;
+ // thus, centerX is adjusted depending on referencePointX.
+ const int keyWidthHalfDiff = (keyWidth - getMostCommonKeyWidth()) / 2;
+ if (referencePointX < centerX - keyWidthHalfDiff) {
+ centerX -= keyWidthHalfDiff;
+ } else if (referencePointX > centerX + keyWidthHalfDiff) {
+ centerX += keyWidthHalfDiff;
+ } else {
+ centerX = referencePointX;
+ }
+ }
+ return centerX;
+}
+
+// When the referencePointY is NOT_A_COORDINATE, this method calculates the return value without
+// using the line segment.
+int ProximityInfo::getKeyCenterYOfKeyIdG(
+ const int keyId, const int referencePointY, const bool isGeometric) const {
+ // TODO: Remove "isGeometric" and have separate "proximity_info"s for gesture and typing.
+ if (keyId < 0) {
+ return 0;
+ }
+ int centerY;
+ if (!hasTouchPositionCorrectionData()) {
+ centerY = mCenterYsG[keyId];
+ } else if (isGeometric) {
+ centerY = static_cast<int>(mSweetSpotCenterYsG[keyId]);
+ } else {
+ centerY = static_cast<int>(mSweetSpotCenterYs[keyId]);
+ }
+ if (referencePointY != NOT_A_COORDINATE &&
+ centerY + mKeyHeights[keyId] > KEYBOARD_HEIGHT && centerY < referencePointY) {
+ // When the distance between center point and bottom edge of the keyboard is shorter than
+ // the key height, we assume the key is located at the bottom row of the keyboard.
+ // The center point is extended to the bottom edge for such keys.
+ return referencePointY;
+ }
+ return centerY;
+}
+
+int ProximityInfo::getKeyKeyDistanceG(const int keyId0, const int keyId1) const {
+ if (keyId0 >= 0 && keyId1 >= 0) {
+ return mKeyKeyDistancesG[keyId0][keyId1];
+ }
+ return MAX_VALUE_FOR_WEIGHTING;
+}
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/core/layout/proximity_info.h b/third_party/android_prediction/suggest/core/layout/proximity_info.h
new file mode 100644
index 0000000..0d3aba4
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/layout/proximity_info.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PROXIMITY_INFO_H
+#define LATINIME_PROXIMITY_INFO_H
+
+#include <string>
+#include <unordered_map>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/layout/proximity_info_utils.h"
+
+namespace latinime {
+
+class ProximityInfo {
+ public:
+ ProximityInfo(const std::string localeJStr,
+ const int keyboardWidth, const int keyboardHeight, const int gridWidth,
+ const int gridHeight, const int mostCommonKeyWidth, const int mostCommonKeyHeight,
+ int *proximityChars, int proximitySize, const int keyCount, const int *keyXCoordinates,
+ const int *keyYCoordinates, const int *keyWidths, const int *keyHeights,
+ const int *keyCharCodes, const float *sweetSpotCenterXs,
+ const float *sweetSpotCenterYs, const float *sweetSpotRadii);
+ ~ProximityInfo();
+ bool hasSpaceProximity(const int x, const int y) const;
+ float getNormalizedSquaredDistanceFromCenterFloatG(
+ const int keyId, const int x, const int y, const bool isGeometric) const;
+ int getCodePointOf(const int keyIndex) const;
+ int getOriginalCodePointOf(const int keyIndex) const;
+ bool hasSweetSpotData(const int keyIndex) const {
+ // When there are no calibration data for a key,
+ // the radius of the key is assigned to zero.
+ return mSweetSpotRadii[keyIndex] > 0.0f;
+ }
+ float getSweetSpotRadiiAt(int keyIndex) const { return mSweetSpotRadii[keyIndex]; }
+ float getSweetSpotCenterXAt(int keyIndex) const { return mSweetSpotCenterXs[keyIndex]; }
+ float getSweetSpotCenterYAt(int keyIndex) const { return mSweetSpotCenterYs[keyIndex]; }
+ bool hasTouchPositionCorrectionData() const { return HAS_TOUCH_POSITION_CORRECTION_DATA; }
+ int getMostCommonKeyWidth() const { return MOST_COMMON_KEY_WIDTH; }
+ int getMostCommonKeyWidthSquare() const { return MOST_COMMON_KEY_WIDTH_SQUARE; }
+ float getNormalizedSquaredMostCommonKeyHypotenuse() const {
+ return NORMALIZED_SQUARED_MOST_COMMON_KEY_HYPOTENUSE;
+ }
+ int getKeyCount() const { return KEY_COUNT; }
+ int getCellHeight() const { return CELL_HEIGHT; }
+ int getCellWidth() const { return CELL_WIDTH; }
+ int getGridWidth() const { return GRID_WIDTH; }
+ int getGridHeight() const { return GRID_HEIGHT; }
+ int getKeyboardWidth() const { return KEYBOARD_WIDTH; }
+ int getKeyboardHeight() const { return KEYBOARD_HEIGHT; }
+ float getKeyboardHypotenuse() const { return KEYBOARD_HYPOTENUSE; }
+
+ int getKeyCenterXOfKeyIdG(
+ const int keyId, const int referencePointX, const bool isGeometric) const;
+ int getKeyCenterYOfKeyIdG(
+ const int keyId, const int referencePointY, const bool isGeometric) const;
+ int getKeyKeyDistanceG(int keyId0, int keyId1) const;
+
+ AK_FORCE_INLINE void initializeProximities(const int *const inputCodes,
+ const int *const inputXCoordinates, const int *const inputYCoordinates,
+ const int inputSize, int *allInputCodes) const {
+ ProximityInfoUtils::initializeProximities(inputCodes, inputXCoordinates, inputYCoordinates,
+ inputSize, mKeyXCoordinates, mKeyYCoordinates, mKeyWidths, mKeyHeights,
+ mProximityCharsArray, CELL_HEIGHT, CELL_WIDTH, GRID_WIDTH, MOST_COMMON_KEY_WIDTH,
+ KEY_COUNT, mLocaleStr, &mLowerCodePointToKeyMap, allInputCodes);
+ }
+
+ AK_FORCE_INLINE int getKeyIndexOf(const int c) const {
+ return ProximityInfoUtils::getKeyIndexOf(KEY_COUNT, c, &mLowerCodePointToKeyMap);
+ }
+
+ AK_FORCE_INLINE bool isCodePointOnKeyboard(const int codePoint) const {
+ return getKeyIndexOf(codePoint) != NOT_AN_INDEX;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(ProximityInfo);
+
+ void initializeG();
+
+ const int GRID_WIDTH;
+ const int GRID_HEIGHT;
+ const int MOST_COMMON_KEY_WIDTH;
+ const int MOST_COMMON_KEY_WIDTH_SQUARE;
+ const float NORMALIZED_SQUARED_MOST_COMMON_KEY_HYPOTENUSE;
+ const int CELL_WIDTH;
+ const int CELL_HEIGHT;
+ const int KEY_COUNT;
+ const int KEYBOARD_WIDTH;
+ const int KEYBOARD_HEIGHT;
+ const float KEYBOARD_HYPOTENUSE;
+ const bool HAS_TOUCH_POSITION_CORRECTION_DATA;
+ // Assuming locale strings such as en_US, sr-Latn etc.
+ static const int MAX_LOCALE_STRING_LENGTH = 10;
+ char mLocaleStr[MAX_LOCALE_STRING_LENGTH];
+ int *mProximityCharsArray;
+ int mKeyXCoordinates[MAX_KEY_COUNT_IN_A_KEYBOARD];
+ int mKeyYCoordinates[MAX_KEY_COUNT_IN_A_KEYBOARD];
+ int mKeyWidths[MAX_KEY_COUNT_IN_A_KEYBOARD];
+ int mKeyHeights[MAX_KEY_COUNT_IN_A_KEYBOARD];
+ int mKeyCodePoints[MAX_KEY_COUNT_IN_A_KEYBOARD];
+ float mSweetSpotCenterXs[MAX_KEY_COUNT_IN_A_KEYBOARD];
+ float mSweetSpotCenterYs[MAX_KEY_COUNT_IN_A_KEYBOARD];
+ // Sweet spots for geometric input. Note that we have extra sweet spots only for Y coordinates.
+ float mSweetSpotCenterYsG[MAX_KEY_COUNT_IN_A_KEYBOARD];
+ float mSweetSpotRadii[MAX_KEY_COUNT_IN_A_KEYBOARD];
+ std::unordered_map<int, int> mLowerCodePointToKeyMap;
+ int mKeyIndexToOriginalCodePoint[MAX_KEY_COUNT_IN_A_KEYBOARD];
+ int mKeyIndexToLowerCodePointG[MAX_KEY_COUNT_IN_A_KEYBOARD];
+ int mCenterXsG[MAX_KEY_COUNT_IN_A_KEYBOARD];
+ int mCenterYsG[MAX_KEY_COUNT_IN_A_KEYBOARD];
+ int mKeyKeyDistancesG[MAX_KEY_COUNT_IN_A_KEYBOARD][MAX_KEY_COUNT_IN_A_KEYBOARD];
+};
+} // namespace latinime
+#endif // LATINIME_PROXIMITY_INFO_H
diff --git a/third_party/android_prediction/suggest/core/layout/proximity_info_params.cpp b/third_party/android_prediction/suggest/core/layout/proximity_info_params.cpp
new file mode 100644
index 0000000..29605f9
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/layout/proximity_info_params.cpp
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/layout/proximity_info_params.h"
+
+namespace latinime {
+const float ProximityInfoParams::NOT_A_DISTANCE_FLOAT = -1.0f;
+const int ProximityInfoParams::MIN_DOUBLE_LETTER_BEELINE_SPEED_PERCENTILE = 5;
+const float ProximityInfoParams::VERTICAL_SWEET_SPOT_SCALE = 1.0f;
+const float ProximityInfoParams::VERTICAL_SWEET_SPOT_SCALE_G = 0.5f;
+
+/* Per method constants */
+// Used by ProximityInfoStateUtils::updateNearKeysDistances()
+const float ProximityInfoParams::NEAR_KEY_THRESHOLD_FOR_DISTANCE = 2.0f;
+
+// Used by ProximityInfoStateUtils::isPrevLocalMin()
+const float ProximityInfoParams::MARGIN_FOR_PREV_LOCAL_MIN = 0.01f;
+
+// Used by ProximityInfoStateUtils::getPointScore()
+const int ProximityInfoParams::DISTANCE_BASE_SCALE = 100;
+const float ProximityInfoParams::NEAR_KEY_THRESHOLD_FOR_POINT_SCORE = 0.6f;
+const int ProximityInfoParams::CORNER_CHECK_DISTANCE_THRESHOLD_SCALE = 25;
+const float ProximityInfoParams::NOT_LOCALMIN_DISTANCE_SCORE = -1.0f;
+const float ProximityInfoParams::LOCALMIN_DISTANCE_AND_NEAR_TO_KEY_SCORE = 1.0f;
+const float ProximityInfoParams::CORNER_ANGLE_THRESHOLD_FOR_POINT_SCORE = M_PI_F * 2.0f / 3.0f;
+const float ProximityInfoParams::CORNER_SUM_ANGLE_THRESHOLD = M_PI_F / 4.0f;
+const float ProximityInfoParams::CORNER_SCORE = 1.0f;
+
+// Used by ProximityInfoStateUtils::refreshSpeedRates()
+const int ProximityInfoParams::NUM_POINTS_FOR_SPEED_CALCULATION = 2;
+
+// Used by ProximityInfoStateUtils::pushTouchPoint()
+const int ProximityInfoParams::LAST_POINT_SKIP_DISTANCE_SCALE = 4;
+
+// Used by ProximityInfoStateUtils::updateAlignPointProbabilities()
+const float ProximityInfoParams::MIN_PROBABILITY = 0.000005f;
+const float ProximityInfoParams::MAX_SKIP_PROBABILITY = 0.95f;
+const float ProximityInfoParams::SKIP_FIRST_POINT_PROBABILITY = 0.01f;
+const float ProximityInfoParams::SKIP_LAST_POINT_PROBABILITY = 0.1f;
+const float ProximityInfoParams::MIN_SPEED_RATE_FOR_SKIP_PROBABILITY = 0.15f;
+const float ProximityInfoParams::SPEED_WEIGHT_FOR_SKIP_PROBABILITY = 0.9f;
+const float ProximityInfoParams::SLOW_STRAIGHT_WEIGHT_FOR_SKIP_PROBABILITY = 0.6f;
+const float ProximityInfoParams::NEAREST_DISTANCE_WEIGHT = 0.5f;
+const float ProximityInfoParams::NEAREST_DISTANCE_BIAS = 0.5f;
+const float ProximityInfoParams::NEAREST_DISTANCE_WEIGHT_FOR_LAST = 0.6f;
+const float ProximityInfoParams::NEAREST_DISTANCE_BIAS_FOR_LAST = 0.4f;
+const float ProximityInfoParams::ANGLE_WEIGHT = 0.90f;
+const float ProximityInfoParams::DEEP_CORNER_ANGLE_THRESHOLD = M_PI_F * 60.0f / 180.0f;
+const float ProximityInfoParams::SKIP_DEEP_CORNER_PROBABILITY = 0.1f;
+const float ProximityInfoParams::CORNER_ANGLE_THRESHOLD = M_PI_F * 30.0f / 180.0f;
+const float ProximityInfoParams::STRAIGHT_ANGLE_THRESHOLD = M_PI_F * 15.0f / 180.0f;
+const float ProximityInfoParams::SKIP_CORNER_PROBABILITY = 0.4f;
+const float ProximityInfoParams::SPEED_MARGIN = 0.1f;
+const float ProximityInfoParams::CENTER_VALUE_OF_NORMALIZED_DISTRIBUTION = 0.0f;
+// TODO: The variance is critical for accuracy; thus, adjusting these parameters by machine
+// learning or something would be efficient.
+const float ProximityInfoParams::SPEEDxANGLE_WEIGHT_FOR_STANDARD_DEVIATION = 0.3f;
+const float ProximityInfoParams::MAX_SPEEDxANGLE_RATE_FOR_STANDARD_DEVIATION = 0.25f;
+const float ProximityInfoParams::SPEEDxNEAREST_WEIGHT_FOR_STANDARD_DEVIATION = 0.5f;
+const float ProximityInfoParams::MAX_SPEEDxNEAREST_RATE_FOR_STANDARD_DEVIATION = 0.15f;
+const float ProximityInfoParams::MIN_STANDARD_DEVIATION = 0.37f;
+const float ProximityInfoParams::STANDARD_DEVIATION_X_WEIGHT_FOR_FIRST = 1.25f;
+const float ProximityInfoParams::STANDARD_DEVIATION_Y_WEIGHT_FOR_FIRST = 0.85f;
+const float ProximityInfoParams::STANDARD_DEVIATION_X_WEIGHT_FOR_LAST = 1.4f;
+const float ProximityInfoParams::STANDARD_DEVIATION_Y_WEIGHT_FOR_LAST = 0.95f;
+const float ProximityInfoParams::STANDARD_DEVIATION_X_WEIGHT = 1.1f;
+const float ProximityInfoParams::STANDARD_DEVIATION_Y_WEIGHT = 0.95f;
+
+// Used by ProximityInfoStateUtils::suppressCharProbabilities()
+const float ProximityInfoParams::SUPPRESSION_LENGTH_WEIGHT = 1.5f;
+const float ProximityInfoParams::MIN_SUPPRESSION_RATE = 0.1f;
+const float ProximityInfoParams::SUPPRESSION_WEIGHT = 0.5f;
+const float ProximityInfoParams::SUPPRESSION_WEIGHT_FOR_PROBABILITY_GAIN = 0.1f;
+const float ProximityInfoParams::SKIP_PROBABALITY_WEIGHT_FOR_PROBABILITY_GAIN = 0.3f;
+
+// Used by ProximityInfoStateUtils::getMostProbableString()
+const float ProximityInfoParams::DEMOTION_LOG_PROBABILITY = 0.3f;
+
+// Used by ProximityInfoStateUtils::updateSampledSearchKeySets()
+// TODO: Investigate if this is required
+const float ProximityInfoParams::SEARCH_KEY_RADIUS_RATIO = 0.95f;
+
+// Used by ProximityInfoStateUtils::calculateBeelineSpeedRate()
+const int ProximityInfoParams::LOOKUP_RADIUS_PERCENTILE = 50;
+const int ProximityInfoParams::FIRST_POINT_TIME_OFFSET_MILLIS = 150;
+const int ProximityInfoParams::STRONG_DOUBLE_LETTER_TIME_MILLIS = 600;
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/core/layout/proximity_info_params.h b/third_party/android_prediction/suggest/core/layout/proximity_info_params.h
new file mode 100644
index 0000000..a5a7aac
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/layout/proximity_info_params.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PROXIMITY_INFO_PARAMS_H
+#define LATINIME_PROXIMITY_INFO_PARAMS_H
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+class ProximityInfoParams {
+ public:
+ static const float NOT_A_DISTANCE_FLOAT;
+ static const int MIN_DOUBLE_LETTER_BEELINE_SPEED_PERCENTILE;
+ static const float VERTICAL_SWEET_SPOT_SCALE;
+ static const float VERTICAL_SWEET_SPOT_SCALE_G;
+
+ // Used by ProximityInfoStateUtils::updateNearKeysDistances()
+ static const float NEAR_KEY_THRESHOLD_FOR_DISTANCE;
+
+ // Used by ProximityInfoStateUtils::isPrevLocalMin()
+ static const float MARGIN_FOR_PREV_LOCAL_MIN;
+
+ // Used by ProximityInfoStateUtils::getPointScore()
+ static const int DISTANCE_BASE_SCALE;
+ static const float NEAR_KEY_THRESHOLD_FOR_POINT_SCORE;
+ static const int CORNER_CHECK_DISTANCE_THRESHOLD_SCALE;
+ static const float NOT_LOCALMIN_DISTANCE_SCORE;
+ static const float LOCALMIN_DISTANCE_AND_NEAR_TO_KEY_SCORE;
+ static const float CORNER_ANGLE_THRESHOLD_FOR_POINT_SCORE;
+ static const float CORNER_SUM_ANGLE_THRESHOLD;
+ static const float CORNER_SCORE;
+
+ // Used by ProximityInfoStateUtils::refreshSpeedRates()
+ static const int NUM_POINTS_FOR_SPEED_CALCULATION;
+
+ // Used by ProximityInfoStateUtils::pushTouchPoint()
+ static const int LAST_POINT_SKIP_DISTANCE_SCALE;
+
+ // Used by ProximityInfoStateUtils::updateAlignPointProbabilities()
+ static const float MIN_PROBABILITY;
+ static const float MAX_SKIP_PROBABILITY;
+ static const float SKIP_FIRST_POINT_PROBABILITY;
+ static const float SKIP_LAST_POINT_PROBABILITY;
+ static const float MIN_SPEED_RATE_FOR_SKIP_PROBABILITY;
+ static const float SPEED_WEIGHT_FOR_SKIP_PROBABILITY;
+ static const float SLOW_STRAIGHT_WEIGHT_FOR_SKIP_PROBABILITY;
+ static const float NEAREST_DISTANCE_WEIGHT;
+ static const float NEAREST_DISTANCE_BIAS;
+ static const float NEAREST_DISTANCE_WEIGHT_FOR_LAST;
+ static const float NEAREST_DISTANCE_BIAS_FOR_LAST;
+ static const float ANGLE_WEIGHT;
+ static const float DEEP_CORNER_ANGLE_THRESHOLD;
+ static const float SKIP_DEEP_CORNER_PROBABILITY;
+ static const float CORNER_ANGLE_THRESHOLD;
+ static const float STRAIGHT_ANGLE_THRESHOLD;
+ static const float SKIP_CORNER_PROBABILITY;
+ static const float SPEED_MARGIN;
+ static const float CENTER_VALUE_OF_NORMALIZED_DISTRIBUTION;
+ static const float SPEEDxANGLE_WEIGHT_FOR_STANDARD_DEVIATION;
+ static const float MAX_SPEEDxANGLE_RATE_FOR_STANDARD_DEVIATION;
+ static const float SPEEDxNEAREST_WEIGHT_FOR_STANDARD_DEVIATION;
+ static const float MAX_SPEEDxNEAREST_RATE_FOR_STANDARD_DEVIATION;
+ static const float MIN_STANDARD_DEVIATION;
+ // X means gesture's direction. Y means gesture's orthogonal direction.
+ static const float STANDARD_DEVIATION_X_WEIGHT_FOR_FIRST;
+ static const float STANDARD_DEVIATION_Y_WEIGHT_FOR_FIRST;
+ static const float STANDARD_DEVIATION_X_WEIGHT_FOR_LAST;
+ static const float STANDARD_DEVIATION_Y_WEIGHT_FOR_LAST;
+ static const float STANDARD_DEVIATION_X_WEIGHT;
+ static const float STANDARD_DEVIATION_Y_WEIGHT;
+
+ // Used by ProximityInfoStateUtils::suppressCharProbabilities()
+ static const float SUPPRESSION_LENGTH_WEIGHT;
+ static const float MIN_SUPPRESSION_RATE;
+ static const float SUPPRESSION_WEIGHT;
+ static const float SUPPRESSION_WEIGHT_FOR_PROBABILITY_GAIN;
+ static const float SKIP_PROBABALITY_WEIGHT_FOR_PROBABILITY_GAIN;
+
+ // Used by ProximityInfoStateUtils::getMostProbableString()
+ static const float DEMOTION_LOG_PROBABILITY;
+
+ // Used by ProximityInfoStateUtils::updateSampledSearchKeySets()
+ static const float SEARCH_KEY_RADIUS_RATIO;
+
+ // Used by ProximityInfoStateUtils::calculateBeelineSpeedRate()
+ static const int LOOKUP_RADIUS_PERCENTILE;
+ static const int FIRST_POINT_TIME_OFFSET_MILLIS;
+ static const int STRONG_DOUBLE_LETTER_TIME_MILLIS;
+
+ // Used by ProximityInfoStateUtils::calculateNormalizedSquaredDistance()
+ static const int NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR;
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(ProximityInfoParams);
+};
+} // namespace latinime
+#endif // LATINIME_PROXIMITY_INFO_PARAMS_H
diff --git a/third_party/android_prediction/suggest/core/layout/proximity_info_state.cpp b/third_party/android_prediction/suggest/core/layout/proximity_info_state.cpp
new file mode 100644
index 0000000..d289919
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/layout/proximity_info_state.cpp
@@ -0,0 +1,306 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define LOG_TAG "LatinIME: proximity_info_state.cpp"
+
+#include "third_party/android_prediction/suggest/core/layout/proximity_info_state.h"
+
+#include <algorithm>
+#include <cstring> // for memset() and memmove()
+#include <sstream> // for debug prints
+#include <unordered_map>
+#include <vector>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/layout/geometry_utils.h"
+#include "third_party/android_prediction/suggest/core/layout/proximity_info.h"
+#include "third_party/android_prediction/suggest/core/layout/proximity_info_state_utils.h"
+#include "third_party/android_prediction/utils/char_utils.h"
+
+namespace latinime {
+
+int ProximityInfoState::getPrimaryOriginalCodePointAt(const int index) const {
+ const int primaryCodePoint = getPrimaryCodePointAt(index);
+ const int keyIndex = mProximityInfo->getKeyIndexOf(primaryCodePoint);
+ return mProximityInfo->getOriginalCodePointOf(keyIndex);
+}
+
+// TODO: Remove the dependency of "isGeometric"
+void ProximityInfoState::initInputParams(const int pointerId, const float maxPointToKeyLength,
+ const ProximityInfo *proximityInfo, const int *const inputCodes, const int inputSize,
+ const int *const xCoordinates, const int *const yCoordinates, const int *const times,
+ const int *const pointerIds, const bool isGeometric) {
+ ASSERT(isGeometric || (inputSize < MAX_WORD_LENGTH));
+ mIsContinuousSuggestionPossible = (mHasBeenUpdatedByGeometricInput != isGeometric) ?
+ false : ProximityInfoStateUtils::checkAndReturnIsContinuousSuggestionPossible(
+ inputSize, xCoordinates, yCoordinates, times, mSampledInputSize,
+ &mSampledInputXs, &mSampledInputYs, &mSampledTimes, &mSampledInputIndice);
+ if (DEBUG_DICT) {
+ AKLOGI("isContinuousSuggestionPossible = %s",
+ (mIsContinuousSuggestionPossible ? "true" : "false"));
+ }
+
+ mProximityInfo = proximityInfo;
+ mHasTouchPositionCorrectionData = proximityInfo->hasTouchPositionCorrectionData();
+ mMostCommonKeyWidthSquare = proximityInfo->getMostCommonKeyWidthSquare();
+ mKeyCount = proximityInfo->getKeyCount();
+ mCellHeight = proximityInfo->getCellHeight();
+ mCellWidth = proximityInfo->getCellWidth();
+ mGridHeight = proximityInfo->getGridWidth();
+ mGridWidth = proximityInfo->getGridHeight();
+
+ memset(mInputProximities, 0, sizeof(mInputProximities));
+
+ if (!isGeometric && pointerId == 0) {
+ mProximityInfo->initializeProximities(inputCodes, xCoordinates, yCoordinates,
+ inputSize, mInputProximities);
+ }
+
+ ///////////////////////
+ // Setup touch points
+ int pushTouchPointStartIndex = 0;
+ int lastSavedInputSize = 0;
+ mMaxPointToKeyLength = maxPointToKeyLength;
+ mSampledInputSize = 0;
+ mMostProbableStringProbability = 0.0f;
+
+ if (mIsContinuousSuggestionPossible && mSampledInputIndice.size() > 1) {
+ // Just update difference.
+ // Previous two points are never skipped. Thus, we pop 2 input point data here.
+ pushTouchPointStartIndex = ProximityInfoStateUtils::trimLastTwoTouchPoints(
+ &mSampledInputXs, &mSampledInputYs, &mSampledTimes, &mSampledLengthCache,
+ &mSampledInputIndice);
+ lastSavedInputSize = mSampledInputXs.size();
+ } else {
+ // Clear all data.
+ mSampledInputXs.clear();
+ mSampledInputYs.clear();
+ mSampledTimes.clear();
+ mSampledInputIndice.clear();
+ mSampledLengthCache.clear();
+ mSampledNormalizedSquaredLengthCache.clear();
+ mSampledSearchKeySets.clear();
+ mSpeedRates.clear();
+ mBeelineSpeedPercentiles.clear();
+ mCharProbabilities.clear();
+ mDirections.clear();
+ }
+
+ if (DEBUG_GEO_FULL) {
+ AKLOGI("Init ProximityInfoState: reused points = %d, last input size = %d",
+ pushTouchPointStartIndex, lastSavedInputSize);
+ }
+
+ if (xCoordinates && yCoordinates) {
+ mSampledInputSize = ProximityInfoStateUtils::updateTouchPoints(mProximityInfo,
+ mMaxPointToKeyLength, mInputProximities, xCoordinates, yCoordinates, times,
+ pointerIds, inputSize, isGeometric, pointerId,
+ pushTouchPointStartIndex, &mSampledInputXs, &mSampledInputYs, &mSampledTimes,
+ &mSampledLengthCache, &mSampledInputIndice);
+ }
+
+ if (mSampledInputSize > 0 && isGeometric) {
+ mAverageSpeed = ProximityInfoStateUtils::refreshSpeedRates(inputSize, xCoordinates,
+ yCoordinates, times, lastSavedInputSize, mSampledInputSize, &mSampledInputXs,
+ &mSampledInputYs, &mSampledTimes, &mSampledLengthCache, &mSampledInputIndice,
+ &mSpeedRates, &mDirections);
+ ProximityInfoStateUtils::refreshBeelineSpeedRates(mProximityInfo->getMostCommonKeyWidth(),
+ mAverageSpeed, inputSize, xCoordinates, yCoordinates, times, mSampledInputSize,
+ &mSampledInputXs, &mSampledInputYs, &mSampledInputIndice,
+ &mBeelineSpeedPercentiles);
+ }
+
+ if (mSampledInputSize > 0) {
+ ProximityInfoStateUtils::initGeometricDistanceInfos(mProximityInfo, mSampledInputSize,
+ lastSavedInputSize, isGeometric, &mSampledInputXs, &mSampledInputYs,
+ &mSampledNormalizedSquaredLengthCache);
+ if (isGeometric) {
+ // updates probabilities of skipping or mapping each key for all points.
+ ProximityInfoStateUtils::updateAlignPointProbabilities(
+ mMaxPointToKeyLength, mProximityInfo->getMostCommonKeyWidth(),
+ mProximityInfo->getKeyCount(), lastSavedInputSize, mSampledInputSize,
+ &mSampledInputXs, &mSampledInputYs, &mSpeedRates, &mSampledLengthCache,
+ &mSampledNormalizedSquaredLengthCache, mProximityInfo, &mCharProbabilities);
+ ProximityInfoStateUtils::updateSampledSearchKeySets(mProximityInfo,
+ mSampledInputSize, lastSavedInputSize, &mSampledLengthCache,
+ &mCharProbabilities, &mSampledSearchKeySets,
+ &mSampledSearchKeyVectors);
+ mMostProbableStringProbability = ProximityInfoStateUtils::getMostProbableString(
+ mProximityInfo, mSampledInputSize, &mCharProbabilities, mMostProbableString);
+
+ }
+ }
+
+ if (DEBUG_SAMPLING_POINTS) {
+ ProximityInfoStateUtils::dump(isGeometric, inputSize, xCoordinates, yCoordinates,
+ mSampledInputSize, &mSampledInputXs, &mSampledInputYs, &mSampledTimes, &mSpeedRates,
+ &mBeelineSpeedPercentiles);
+ }
+ // end
+ ///////////////////////
+
+ mTouchPositionCorrectionEnabled = mSampledInputSize > 0 && mHasTouchPositionCorrectionData
+ && xCoordinates && yCoordinates;
+ if (!isGeometric && pointerId == 0) {
+ ProximityInfoStateUtils::initPrimaryInputWord(
+ inputSize, mInputProximities, mPrimaryInputWord);
+ }
+ if (DEBUG_GEO_FULL) {
+ AKLOGI("ProximityState init finished: %d points out of %d", mSampledInputSize, inputSize);
+ }
+ mHasBeenUpdatedByGeometricInput = isGeometric;
+}
+
+// This function basically converts from a length to an edit distance. Accordingly, it's obviously
+// wrong to compare with mMaxPointToKeyLength.
+float ProximityInfoState::getPointToKeyLength(
+ const int inputIndex, const int codePoint) const {
+ const int keyId = mProximityInfo->getKeyIndexOf(codePoint);
+ if (keyId != NOT_AN_INDEX) {
+ const int index = inputIndex * mProximityInfo->getKeyCount() + keyId;
+ return std::min(mSampledNormalizedSquaredLengthCache[index], mMaxPointToKeyLength);
+ }
+ if (CharUtils::isIntentionalOmissionCodePoint(codePoint)) {
+ return 0.0f;
+ }
+ // If the char is not a key on the keyboard then return the max length.
+ return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
+}
+
+float ProximityInfoState::getPointToKeyByIdLength(
+ const int inputIndex, const int keyId) const {
+ return ProximityInfoStateUtils::getPointToKeyByIdLength(mMaxPointToKeyLength,
+ &mSampledNormalizedSquaredLengthCache, mProximityInfo->getKeyCount(), inputIndex,
+ keyId);
+}
+
+// In the following function, c is the current character of the dictionary word currently examined.
+// currentChars is an array containing the keys close to the character the user actually typed at
+// the same position. We want to see if c is in it: if so, then the word contains at that position
+// a character close to what the user typed.
+// What the user typed is actually the first character of the array.
+// proximityIndex is a pointer to the variable where getProximityType returns the index of c
+// in the proximity chars of the input index.
+// Notice : accented characters do not have a proximity list, so they are alone in their list. The
+// non-accented version of the character should be considered "close", but not the other keys close
+// to the non-accented version.
+ProximityType ProximityInfoState::getProximityType(const int index, const int codePoint,
+ const bool checkProximityChars, int *proximityIndex) const {
+ const int *currentCodePoints = getProximityCodePointsAt(index);
+ const int firstCodePoint = currentCodePoints[0];
+ const int baseLowerC = CharUtils::toBaseLowerCase(codePoint);
+
+ // The first char in the array is what user typed. If it matches right away, that means the
+ // user typed that same char for this pos.
+ if (firstCodePoint == baseLowerC || firstCodePoint == codePoint) {
+ return MATCH_CHAR;
+ }
+
+ if (!checkProximityChars) return SUBSTITUTION_CHAR;
+
+ // If the non-accented, lowercased version of that first character matches c, then we have a
+ // non-accented version of the accented character the user typed. Treat it as a close char.
+ if (CharUtils::toBaseLowerCase(firstCodePoint) == baseLowerC) {
+ return PROXIMITY_CHAR;
+ }
+
+ // Not an exact nor an accent-alike match: search the list of close keys
+ int j = 1;
+ while (j < MAX_PROXIMITY_CHARS_SIZE
+ && currentCodePoints[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
+ const bool matched = (currentCodePoints[j] == baseLowerC
+ || currentCodePoints[j] == codePoint);
+ if (matched) {
+ if (proximityIndex) {
+ *proximityIndex = j;
+ }
+ return PROXIMITY_CHAR;
+ }
+ ++j;
+ }
+ if (j < MAX_PROXIMITY_CHARS_SIZE
+ && currentCodePoints[j] == ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
+ ++j;
+ while (j < MAX_PROXIMITY_CHARS_SIZE
+ && currentCodePoints[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
+ const bool matched = (currentCodePoints[j] == baseLowerC
+ || currentCodePoints[j] == codePoint);
+ if (matched) {
+ if (proximityIndex) {
+ *proximityIndex = j;
+ }
+ return ADDITIONAL_PROXIMITY_CHAR;
+ }
+ ++j;
+ }
+ }
+ // Was not included, signal this as a substitution character.
+ return SUBSTITUTION_CHAR;
+}
+
+ProximityType ProximityInfoState::getProximityTypeG(const int index, const int codePoint) const {
+ if (!isUsed()) {
+ return UNRELATED_CHAR;
+ }
+ const int sampledSearchKeyVectorsSize = static_cast<int>(mSampledSearchKeyVectors.size());
+ if (index < 0 || index >= sampledSearchKeyVectorsSize) {
+ AKLOGE("getProximityTypeG() is called with an invalid index(%d). "
+ "mSampledSearchKeyVectors.size() = %d, codePoint = %x.", index,
+ sampledSearchKeyVectorsSize, codePoint);
+ ASSERT(false);
+ return UNRELATED_CHAR;
+ }
+ const int lowerCodePoint = CharUtils::toLowerCase(codePoint);
+ const int baseLowerCodePoint = CharUtils::toBaseCodePoint(lowerCodePoint);
+ for (int i = 0; i < static_cast<int>(mSampledSearchKeyVectors[index].size()); ++i) {
+ if (mSampledSearchKeyVectors[index][i] == lowerCodePoint
+ || mSampledSearchKeyVectors[index][i] == baseLowerCodePoint) {
+ return MATCH_CHAR;
+ }
+ }
+ return UNRELATED_CHAR;
+}
+
+bool ProximityInfoState::isKeyInSerchKeysAfterIndex(const int index, const int keyId) const {
+ ASSERT(keyId >= 0 && index >= 0 && index < mSampledInputSize);
+ return mSampledSearchKeySets[index].test(keyId);
+}
+
+float ProximityInfoState::getDirection(const int index0, const int index1) const {
+ return ProximityInfoStateUtils::getDirection(
+ &mSampledInputXs, &mSampledInputYs, index0, index1);
+}
+
+float ProximityInfoState::getMostProbableString(int *const codePointBuf) const {
+ memmove(codePointBuf, mMostProbableString, sizeof(mMostProbableString));
+ return mMostProbableStringProbability;
+}
+
+bool ProximityInfoState::hasSpaceProximity(const int index) const {
+ ASSERT(0 <= index && index < mSampledInputSize);
+ return mProximityInfo->hasSpaceProximity(getInputX(index), getInputY(index));
+}
+
+// Returns a probability of mapping index to keyIndex.
+float ProximityInfoState::getProbability(const int index, const int keyIndex) const {
+ ASSERT(0 <= index && index < mSampledInputSize);
+ std::unordered_map<int, float>::const_iterator it = mCharProbabilities[index].find(keyIndex);
+ if (it != mCharProbabilities[index].end()) {
+ return it->second;
+ }
+ return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
+}
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/core/layout/proximity_info_state.h b/third_party/android_prediction/suggest/core/layout/proximity_info_state.h
new file mode 100644
index 0000000..7068f0b
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/layout/proximity_info_state.h
@@ -0,0 +1,233 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PROXIMITY_INFO_STATE_H
+#define LATINIME_PROXIMITY_INFO_STATE_H
+
+#include <cstring> // for memset()
+#include <unordered_map>
+#include <vector>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/layout/proximity_info_params.h"
+#include "third_party/android_prediction/suggest/core/layout/proximity_info_state_utils.h"
+
+namespace latinime {
+
+class ProximityInfo;
+
+class ProximityInfoState {
+ public:
+ /////////////////////////////////////////
+ // Defined in proximity_info_state.cpp //
+ /////////////////////////////////////////
+ void initInputParams(const int pointerId, const float maxPointToKeyLength,
+ const ProximityInfo *proximityInfo, const int *const inputCodes,
+ const int inputSize, const int *xCoordinates, const int *yCoordinates,
+ const int *const times, const int *const pointerIds, const bool isGeometric);
+
+ /////////////////////////////////////////
+ // Defined here //
+ /////////////////////////////////////////
+ AK_FORCE_INLINE ProximityInfoState()
+ : mProximityInfo(nullptr), mMaxPointToKeyLength(0.0f), mAverageSpeed(0.0f),
+ mHasTouchPositionCorrectionData(false), mMostCommonKeyWidthSquare(0),
+ mKeyCount(0), mCellHeight(0), mCellWidth(0), mGridHeight(0), mGridWidth(0),
+ mIsContinuousSuggestionPossible(false), mHasBeenUpdatedByGeometricInput(false),
+ mSampledInputXs(), mSampledInputYs(), mSampledTimes(), mSampledInputIndice(),
+ mSampledLengthCache(), mBeelineSpeedPercentiles(),
+ mSampledNormalizedSquaredLengthCache(), mSpeedRates(), mDirections(),
+ mCharProbabilities(), mSampledSearchKeySets(), mSampledSearchKeyVectors(),
+ mTouchPositionCorrectionEnabled(false), mSampledInputSize(0),
+ mMostProbableStringProbability(0.0f) {
+ memset(mInputProximities, 0, sizeof(mInputProximities));
+ memset(mPrimaryInputWord, 0, sizeof(mPrimaryInputWord));
+ memset(mMostProbableString, 0, sizeof(mMostProbableString));
+ }
+
+ // Non virtual inline destructor -- never inherit this class
+ AK_FORCE_INLINE ~ProximityInfoState() {}
+
+ inline int getPrimaryCodePointAt(const int index) const {
+ return getProximityCodePointsAt(index)[0];
+ }
+
+ int getPrimaryOriginalCodePointAt(const int index) const;
+
+ inline bool sameAsTyped(const int *word, int length) const {
+ if (length != mSampledInputSize) {
+ return false;
+ }
+ const int *inputProximities = mInputProximities;
+ while (length--) {
+ if (*inputProximities != *word) {
+ return false;
+ }
+ inputProximities += MAX_PROXIMITY_CHARS_SIZE;
+ word++;
+ }
+ return true;
+ }
+
+ AK_FORCE_INLINE bool existsCodePointInProximityAt(const int index, const int c) const {
+ const int *codePoints = getProximityCodePointsAt(index);
+ int i = 0;
+ while (codePoints[i] > 0 && i < MAX_PROXIMITY_CHARS_SIZE) {
+ if (codePoints[i++] == c) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ AK_FORCE_INLINE bool existsAdjacentProximityChars(const int index) const {
+ if (index < 0 || index >= mSampledInputSize) return false;
+ const int currentCodePoint = getPrimaryCodePointAt(index);
+ const int leftIndex = index - 1;
+ if (leftIndex >= 0 && existsCodePointInProximityAt(leftIndex, currentCodePoint)) {
+ return true;
+ }
+ const int rightIndex = index + 1;
+ if (rightIndex < mSampledInputSize
+ && existsCodePointInProximityAt(rightIndex, currentCodePoint)) {
+ return true;
+ }
+ return false;
+ }
+
+ inline bool touchPositionCorrectionEnabled() const {
+ return mTouchPositionCorrectionEnabled;
+ }
+
+ bool isUsed() const {
+ return mSampledInputSize > 0;
+ }
+
+ int size() const {
+ return mSampledInputSize;
+ }
+
+ int getInputX(const int index) const {
+ return mSampledInputXs[index];
+ }
+
+ int getInputY(const int index) const {
+ return mSampledInputYs[index];
+ }
+
+ int getInputIndexOfSampledPoint(const int sampledIndex) const {
+ return mSampledInputIndice[sampledIndex];
+ }
+
+ bool hasSpaceProximity(const int index) const;
+
+ int getLengthCache(const int index) const {
+ return mSampledLengthCache[index];
+ }
+
+ bool isContinuousSuggestionPossible() const {
+ return mIsContinuousSuggestionPossible;
+ }
+
+ // TODO: Rename s/Length/NormalizedSquaredLength/
+ float getPointToKeyByIdLength(const int inputIndex, const int keyId) const;
+ // TODO: Rename s/Length/NormalizedSquaredLength/
+ float getPointToKeyLength(const int inputIndex, const int codePoint) const;
+
+ ProximityType getProximityType(const int index, const int codePoint,
+ const bool checkProximityChars, int *proximityIndex = 0) const;
+
+ ProximityType getProximityTypeG(const int index, const int codePoint) const;
+
+ float getSpeedRate(const int index) const {
+ return mSpeedRates[index];
+ }
+
+ AK_FORCE_INLINE int getBeelineSpeedPercentile(const int id) const {
+ return mBeelineSpeedPercentiles[id];
+ }
+
+ AK_FORCE_INLINE DoubleLetterLevel getDoubleLetterLevel(const int id) const {
+ const int beelineSpeedRate = getBeelineSpeedPercentile(id);
+ if (beelineSpeedRate == 0) {
+ return A_STRONG_DOUBLE_LETTER;
+ } else if (beelineSpeedRate
+ < ProximityInfoParams::MIN_DOUBLE_LETTER_BEELINE_SPEED_PERCENTILE) {
+ return A_DOUBLE_LETTER;
+ } else {
+ return NOT_A_DOUBLE_LETTER;
+ }
+ }
+
+ float getDirection(const int index) const {
+ return mDirections[index];
+ }
+ // get xy direction
+ float getDirection(const int x, const int y) const;
+
+ float getMostProbableString(int *const codePointBuf) const;
+
+ float getProbability(const int index, const int charCode) const;
+
+ bool isKeyInSerchKeysAfterIndex(const int index, const int keyId) const;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ProximityInfoState);
+
+ inline const int *getProximityCodePointsAt(const int index) const {
+ return ProximityInfoStateUtils::getProximityCodePointsAt(mInputProximities, index);
+ }
+
+ // const
+ const ProximityInfo *mProximityInfo;
+ float mMaxPointToKeyLength;
+ float mAverageSpeed;
+ bool mHasTouchPositionCorrectionData;
+ int mMostCommonKeyWidthSquare;
+ int mKeyCount;
+ int mCellHeight;
+ int mCellWidth;
+ int mGridHeight;
+ int mGridWidth;
+ bool mIsContinuousSuggestionPossible;
+ bool mHasBeenUpdatedByGeometricInput;
+
+ std::vector<int> mSampledInputXs;
+ std::vector<int> mSampledInputYs;
+ std::vector<int> mSampledTimes;
+ std::vector<int> mSampledInputIndice;
+ std::vector<int> mSampledLengthCache;
+ std::vector<int> mBeelineSpeedPercentiles;
+ std::vector<float> mSampledNormalizedSquaredLengthCache;
+ std::vector<float> mSpeedRates;
+ std::vector<float> mDirections;
+ // probabilities of skipping or mapping to a key for each point.
+ std::vector<std::unordered_map<int, float>> mCharProbabilities;
+ // The vector for the key code set which holds nearby keys of some trailing sampled input points
+ // for each sampled input point. These nearby keys contain the next characters which can be in
+ // the dictionary. Specifically, currently we are looking for keys nearby trailing sampled
+ // inputs including the current input point.
+ std::vector<ProximityInfoStateUtils::NearKeycodesSet> mSampledSearchKeySets;
+ std::vector<std::vector<int>> mSampledSearchKeyVectors;
+ bool mTouchPositionCorrectionEnabled;
+ int mInputProximities[MAX_PROXIMITY_CHARS_SIZE * MAX_WORD_LENGTH];
+ int mSampledInputSize;
+ int mPrimaryInputWord[MAX_WORD_LENGTH];
+ float mMostProbableStringProbability;
+ int mMostProbableString[MAX_WORD_LENGTH];
+};
+} // namespace latinime
+#endif // LATINIME_PROXIMITY_INFO_STATE_H
diff --git a/third_party/android_prediction/suggest/core/layout/proximity_info_state_utils.cpp b/third_party/android_prediction/suggest/core/layout/proximity_info_state_utils.cpp
new file mode 100644
index 0000000..d80be62
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/layout/proximity_info_state_utils.cpp
@@ -0,0 +1,1015 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/core/layout/proximity_info_state_utils.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstring> // for memset()
+#include <sstream> // for debug prints
+#include <unordered_map>
+#include <vector>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/layout/geometry_utils.h"
+#include "third_party/android_prediction/suggest/core/layout/normal_distribution_2d.h"
+#include "third_party/android_prediction/suggest/core/layout/proximity_info.h"
+#include "third_party/android_prediction/suggest/core/layout/proximity_info_params.h"
+
+namespace latinime {
+
+/* static */ int ProximityInfoStateUtils::trimLastTwoTouchPoints(std::vector<int> *sampledInputXs,
+ std::vector<int> *sampledInputYs, std::vector<int> *sampledInputTimes,
+ std::vector<int> *sampledLengthCache, std::vector<int> *sampledInputIndice) {
+ const int nextStartIndex = (*sampledInputIndice)[sampledInputIndice->size() - 2];
+ popInputData(sampledInputXs, sampledInputYs, sampledInputTimes, sampledLengthCache,
+ sampledInputIndice);
+ popInputData(sampledInputXs, sampledInputYs, sampledInputTimes, sampledLengthCache,
+ sampledInputIndice);
+ return nextStartIndex;
+}
+
+/* static */ int ProximityInfoStateUtils::updateTouchPoints(
+ const ProximityInfo *const proximityInfo, const int maxPointToKeyLength,
+ const int *const inputProximities, const int *const inputXCoordinates,
+ const int *const inputYCoordinates, const int *const times, const int *const pointerIds,
+ const int inputSize, const bool isGeometric, const int pointerId,
+ const int pushTouchPointStartIndex, std::vector<int> *sampledInputXs,
+ std::vector<int> *sampledInputYs, std::vector<int> *sampledInputTimes,
+ std::vector<int> *sampledLengthCache, std::vector<int> *sampledInputIndice) {
+ if (DEBUG_SAMPLING_POINTS) {
+ if (times) {
+ for (int i = 0; i < inputSize; ++i) {
+ AKLOGI("(%d) x %d, y %d, time %d",
+ i, inputXCoordinates[i], inputYCoordinates[i], times[i]);
+ }
+ }
+ }
+#ifdef DO_ASSERT_TEST
+ if (times) {
+ for (int i = 0; i < inputSize; ++i) {
+ if (i > 0) {
+ if (times[i] < times[i - 1]) {
+ AKLOGI("Invalid time sequence. %d, %d", times[i - 1], times[i]);
+ ASSERT(false);
+ }
+ }
+ }
+ }
+#endif
+ const bool proximityOnly = !isGeometric
+ && (inputXCoordinates[0] < 0 || inputYCoordinates[0] < 0);
+ int lastInputIndex = pushTouchPointStartIndex;
+ for (int i = lastInputIndex; i < inputSize; ++i) {
+ const int pid = pointerIds ? pointerIds[i] : 0;
+ if (pointerId == pid) {
+ lastInputIndex = i;
+ }
+ }
+ if (DEBUG_GEO_FULL) {
+ AKLOGI("Init ProximityInfoState: last input index = %d", lastInputIndex);
+ }
+ // Working space to save near keys distances for current, prev and prevprev input point.
+ NearKeysDistanceMap nearKeysDistances[3];
+ // These pointers are swapped for each inputs points.
+ NearKeysDistanceMap *currentNearKeysDistances = &nearKeysDistances[0];
+ NearKeysDistanceMap *prevNearKeysDistances = &nearKeysDistances[1];
+ NearKeysDistanceMap *prevPrevNearKeysDistances = &nearKeysDistances[2];
+ // "sumAngle" is accumulated by each angle of input points. And when "sumAngle" exceeds
+ // the threshold we save that point, reset sumAngle. This aims to keep the figure of
+ // the curve.
+ float sumAngle = 0.0f;
+
+ for (int i = pushTouchPointStartIndex; i <= lastInputIndex; ++i) {
+ // Assuming pointerId == 0 if pointerIds is null.
+ const int pid = pointerIds ? pointerIds[i] : 0;
+ if (DEBUG_GEO_FULL) {
+ AKLOGI("Init ProximityInfoState: (%d)PID = %d", i, pid);
+ }
+ if (pointerId == pid) {
+ const int c = isGeometric ?
+ NOT_A_COORDINATE : getPrimaryCodePointAt(inputProximities, i);
+ const int x = proximityOnly ? NOT_A_COORDINATE : inputXCoordinates[i];
+ const int y = proximityOnly ? NOT_A_COORDINATE : inputYCoordinates[i];
+ const int time = times ? times[i] : -1;
+
+ if (i > 1) {
+ const float prevAngle = GeometryUtils::getAngle(
+ inputXCoordinates[i - 2], inputYCoordinates[i - 2],
+ inputXCoordinates[i - 1], inputYCoordinates[i - 1]);
+ const float currentAngle = GeometryUtils::getAngle(
+ inputXCoordinates[i - 1], inputYCoordinates[i - 1], x, y);
+ sumAngle += GeometryUtils::getAngleDiff(prevAngle, currentAngle);
+ }
+
+ if (pushTouchPoint(proximityInfo, maxPointToKeyLength, i, c, x, y, time,
+ isGeometric, isGeometric /* doSampling */, i == lastInputIndex,
+ sumAngle, currentNearKeysDistances, prevNearKeysDistances,
+ prevPrevNearKeysDistances, sampledInputXs, sampledInputYs, sampledInputTimes,
+ sampledLengthCache, sampledInputIndice)) {
+ // Previous point information was popped.
+ NearKeysDistanceMap *tmp = prevNearKeysDistances;
+ prevNearKeysDistances = currentNearKeysDistances;
+ currentNearKeysDistances = tmp;
+ } else {
+ NearKeysDistanceMap *tmp = prevPrevNearKeysDistances;
+ prevPrevNearKeysDistances = prevNearKeysDistances;
+ prevNearKeysDistances = currentNearKeysDistances;
+ currentNearKeysDistances = tmp;
+ sumAngle = 0.0f;
+ }
+ }
+ }
+ return sampledInputXs->size();
+}
+
+/* static */ const int *ProximityInfoStateUtils::getProximityCodePointsAt(
+ const int *const inputProximities, const int index) {
+ return inputProximities + (index * MAX_PROXIMITY_CHARS_SIZE);
+}
+
+/* static */ int ProximityInfoStateUtils::getPrimaryCodePointAt(const int *const inputProximities,
+ const int index) {
+ return getProximityCodePointsAt(inputProximities, index)[0];
+}
+
+/* static */ void ProximityInfoStateUtils::initPrimaryInputWord(const int inputSize,
+ const int *const inputProximities, int *primaryInputWord) {
+ memset(primaryInputWord, 0, sizeof(primaryInputWord[0]) * MAX_WORD_LENGTH);
+ for (int i = 0; i < inputSize; ++i) {
+ primaryInputWord[i] = getPrimaryCodePointAt(inputProximities, i);
+ }
+}
+
+/* static */ float ProximityInfoStateUtils::calculateSquaredDistanceFromSweetSpotCenter(
+ const ProximityInfo *const proximityInfo, const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs, const int keyIndex, const int inputIndex) {
+ const float sweetSpotCenterX = proximityInfo->getSweetSpotCenterXAt(keyIndex);
+ const float sweetSpotCenterY = proximityInfo->getSweetSpotCenterYAt(keyIndex);
+ const float inputX = static_cast<float>((*sampledInputXs)[inputIndex]);
+ const float inputY = static_cast<float>((*sampledInputYs)[inputIndex]);
+ return GeometryUtils::SQUARE_FLOAT(inputX - sweetSpotCenterX)
+ + GeometryUtils::SQUARE_FLOAT(inputY - sweetSpotCenterY);
+}
+
+/* static */ float ProximityInfoStateUtils::calculateNormalizedSquaredDistance(
+ const ProximityInfo *const proximityInfo, const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs, const int keyIndex, const int inputIndex) {
+ if (keyIndex == NOT_AN_INDEX) {
+ return ProximityInfoParams::NOT_A_DISTANCE_FLOAT;
+ }
+ if (!proximityInfo->hasSweetSpotData(keyIndex)) {
+ return ProximityInfoParams::NOT_A_DISTANCE_FLOAT;
+ }
+ if (NOT_A_COORDINATE == (*sampledInputXs)[inputIndex]) {
+ return ProximityInfoParams::NOT_A_DISTANCE_FLOAT;
+ }
+ const float squaredDistance = calculateSquaredDistanceFromSweetSpotCenter(proximityInfo,
+ sampledInputXs, sampledInputYs, keyIndex, inputIndex);
+ const float squaredRadius = GeometryUtils::SQUARE_FLOAT(
+ proximityInfo->getSweetSpotRadiiAt(keyIndex));
+ return squaredDistance / squaredRadius;
+}
+
+/* static */ void ProximityInfoStateUtils::initGeometricDistanceInfos(
+ const ProximityInfo *const proximityInfo, const int sampledInputSize,
+ const int lastSavedInputSize, const bool isGeometric,
+ const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs,
+ std::vector<float> *sampledNormalizedSquaredLengthCache) {
+ const int keyCount = proximityInfo->getKeyCount();
+ sampledNormalizedSquaredLengthCache->resize(sampledInputSize * keyCount);
+ for (int i = lastSavedInputSize; i < sampledInputSize; ++i) {
+ for (int k = 0; k < keyCount; ++k) {
+ const int index = i * keyCount + k;
+ const int x = (*sampledInputXs)[i];
+ const int y = (*sampledInputYs)[i];
+ const float normalizedSquaredDistance =
+ proximityInfo->getNormalizedSquaredDistanceFromCenterFloatG(
+ k, x, y, isGeometric);
+ (*sampledNormalizedSquaredLengthCache)[index] = normalizedSquaredDistance;
+ }
+ }
+}
+
+/* static */ void ProximityInfoStateUtils::popInputData(std::vector<int> *sampledInputXs,
+ std::vector<int> *sampledInputYs, std::vector<int> *sampledInputTimes,
+ std::vector<int> *sampledLengthCache, std::vector<int> *sampledInputIndice) {
+ sampledInputXs->pop_back();
+ sampledInputYs->pop_back();
+ sampledInputTimes->pop_back();
+ sampledLengthCache->pop_back();
+ sampledInputIndice->pop_back();
+}
+
+/* static */ float ProximityInfoStateUtils::refreshSpeedRates(const int inputSize,
+ const int *const xCoordinates, const int *const yCoordinates, const int *const times,
+ const int lastSavedInputSize, const int sampledInputSize,
+ const std::vector<int> *const sampledInputXs, const std::vector<int> *const sampledInputYs,
+ const std::vector<int> *const sampledInputTimes,
+ const std::vector<int> *const sampledLengthCache,
+ const std::vector<int> *const sampledInputIndice, std::vector<float> *sampledSpeedRates,
+ std::vector<float> *sampledDirections) {
+ // Relative speed calculation.
+ const int sumDuration = sampledInputTimes->back() - sampledInputTimes->front();
+ const int sumLength = sampledLengthCache->back() - sampledLengthCache->front();
+ const float averageSpeed = static_cast<float>(sumLength) / static_cast<float>(sumDuration);
+ sampledSpeedRates->resize(sampledInputSize);
+ for (int i = lastSavedInputSize; i < sampledInputSize; ++i) {
+ const int index = (*sampledInputIndice)[i];
+ int length = 0;
+ int duration = 0;
+
+ // Calculate velocity by using distances and durations of
+ // ProximityInfoParams::NUM_POINTS_FOR_SPEED_CALCULATION points for both forward and
+ // backward.
+ const int forwardNumPoints = std::min(inputSize - 1,
+ index + ProximityInfoParams::NUM_POINTS_FOR_SPEED_CALCULATION);
+ for (int j = index; j < forwardNumPoints; ++j) {
+ if (i < sampledInputSize - 1 && j >= (*sampledInputIndice)[i + 1]) {
+ break;
+ }
+ length += GeometryUtils::getDistanceInt(xCoordinates[j], yCoordinates[j],
+ xCoordinates[j + 1], yCoordinates[j + 1]);
+ duration += times[j + 1] - times[j];
+ }
+ const int backwardNumPoints = std::max(0,
+ index - ProximityInfoParams::NUM_POINTS_FOR_SPEED_CALCULATION);
+ for (int j = index - 1; j >= backwardNumPoints; --j) {
+ if (i > 0 && j < (*sampledInputIndice)[i - 1]) {
+ break;
+ }
+ // TODO: use mSampledLengthCache instead?
+ length += GeometryUtils::getDistanceInt(xCoordinates[j], yCoordinates[j],
+ xCoordinates[j + 1], yCoordinates[j + 1]);
+ duration += times[j + 1] - times[j];
+ }
+ if (duration == 0 || sumDuration == 0) {
+ // Cannot calculate speed; thus, it gives an average value (1.0);
+ (*sampledSpeedRates)[i] = 1.0f;
+ } else {
+ const float speed = static_cast<float>(length) / static_cast<float>(duration);
+ (*sampledSpeedRates)[i] = speed / averageSpeed;
+ }
+ }
+
+ // Direction calculation.
+ sampledDirections->resize(sampledInputSize - 1);
+ for (int i = std::max(0, lastSavedInputSize - 1); i < sampledInputSize - 1; ++i) {
+ (*sampledDirections)[i] = getDirection(sampledInputXs, sampledInputYs, i, i + 1);
+ }
+ return averageSpeed;
+}
+
+/* static */ void ProximityInfoStateUtils::refreshBeelineSpeedRates(const int mostCommonKeyWidth,
+ const float averageSpeed, const int inputSize, const int *const xCoordinates,
+ const int *const yCoordinates, const int *times, const int sampledInputSize,
+ const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs, const std::vector<int> *const inputIndice,
+ std::vector<int> *beelineSpeedPercentiles) {
+ if (DEBUG_SAMPLING_POINTS) {
+ AKLOGI("--- refresh beeline speed rates");
+ }
+ beelineSpeedPercentiles->resize(sampledInputSize);
+ for (int i = 0; i < sampledInputSize; ++i) {
+ (*beelineSpeedPercentiles)[i] = static_cast<int>(calculateBeelineSpeedRate(
+ mostCommonKeyWidth, averageSpeed, i, inputSize, xCoordinates, yCoordinates, times,
+ sampledInputSize, sampledInputXs, sampledInputYs, inputIndice) * MAX_PERCENTILE);
+ }
+}
+
+/* static */float ProximityInfoStateUtils::getDirection(
+ const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs, const int index0, const int index1) {
+ ASSERT(sampledInputXs && sampledInputYs);
+ const int sampledInputSize =sampledInputXs->size();
+ if (index0 < 0 || index0 > sampledInputSize - 1) {
+ return 0.0f;
+ }
+ if (index1 < 0 || index1 > sampledInputSize - 1) {
+ return 0.0f;
+ }
+ const int x1 = (*sampledInputXs)[index0];
+ const int y1 = (*sampledInputYs)[index0];
+ const int x2 = (*sampledInputXs)[index1];
+ const int y2 = (*sampledInputYs)[index1];
+ return GeometryUtils::getAngle(x1, y1, x2, y2);
+}
+
+// Calculating point to key distance for all near keys and returning the distance between
+// the given point and the nearest key position.
+/* static */ float ProximityInfoStateUtils::updateNearKeysDistances(
+ const ProximityInfo *const proximityInfo, const float maxPointToKeyLength, const int x,
+ const int y, const bool isGeometric, NearKeysDistanceMap *const currentNearKeysDistances) {
+ currentNearKeysDistances->clear();
+ const int keyCount = proximityInfo->getKeyCount();
+ float nearestKeyDistance = maxPointToKeyLength;
+ for (int k = 0; k < keyCount; ++k) {
+ const float dist = proximityInfo->getNormalizedSquaredDistanceFromCenterFloatG(k, x, y,
+ isGeometric);
+ if (dist < ProximityInfoParams::NEAR_KEY_THRESHOLD_FOR_DISTANCE) {
+ currentNearKeysDistances->insert(std::pair<int, float>(k, dist));
+ }
+ if (nearestKeyDistance > dist) {
+ nearestKeyDistance = dist;
+ }
+ }
+ return nearestKeyDistance;
+}
+
+// Check if previous point is at local minimum position to near keys.
+/* static */ bool ProximityInfoStateUtils::isPrevLocalMin(
+ const NearKeysDistanceMap *const currentNearKeysDistances,
+ const NearKeysDistanceMap *const prevNearKeysDistances,
+ const NearKeysDistanceMap *const prevPrevNearKeysDistances) {
+ for (NearKeysDistanceMap::const_iterator it = prevNearKeysDistances->begin();
+ it != prevNearKeysDistances->end(); ++it) {
+ NearKeysDistanceMap::const_iterator itPP = prevPrevNearKeysDistances->find(it->first);
+ NearKeysDistanceMap::const_iterator itC = currentNearKeysDistances->find(it->first);
+ const bool isPrevPrevNear = (itPP == prevPrevNearKeysDistances->end()
+ || itPP->second > it->second + ProximityInfoParams::MARGIN_FOR_PREV_LOCAL_MIN);
+ const bool isCurrentNear = (itC == currentNearKeysDistances->end()
+ || itC->second > it->second + ProximityInfoParams::MARGIN_FOR_PREV_LOCAL_MIN);
+ if (isPrevPrevNear && isCurrentNear) {
+ return true;
+ }
+ }
+ return false;
+}
+
+// Calculating a point score that indicates usefulness of the point.
+/* static */ float ProximityInfoStateUtils::getPointScore(const int mostCommonKeyWidth,
+ const int x, const int y, const int time, const bool lastPoint, const float nearest,
+ const float sumAngle, const NearKeysDistanceMap *const currentNearKeysDistances,
+ const NearKeysDistanceMap *const prevNearKeysDistances,
+ const NearKeysDistanceMap *const prevPrevNearKeysDistances,
+ std::vector<int> *sampledInputXs, std::vector<int> *sampledInputYs) {
+ const size_t size = sampledInputXs->size();
+ // If there is only one point, add this point. Besides, if the previous point's distance map
+ // is empty, we re-compute nearby keys distances from the current point.
+ // Note that the current point is the first point in the incremental input that needs to
+ // be re-computed.
+ if (size <= 1 || prevNearKeysDistances->empty()) {
+ return 0.0f;
+ }
+
+ const int baseSampleRate = mostCommonKeyWidth;
+ const int distPrev = GeometryUtils::getDistanceInt(sampledInputXs->back(),
+ sampledInputYs->back(), (*sampledInputXs)[size - 2],
+ (*sampledInputYs)[size - 2]) * ProximityInfoParams::DISTANCE_BASE_SCALE;
+ float score = 0.0f;
+
+ // Location
+ if (!isPrevLocalMin(currentNearKeysDistances, prevNearKeysDistances,
+ prevPrevNearKeysDistances)) {
+ score += ProximityInfoParams::NOT_LOCALMIN_DISTANCE_SCORE;
+ } else if (nearest < ProximityInfoParams::NEAR_KEY_THRESHOLD_FOR_POINT_SCORE) {
+ // Promote points nearby keys
+ score += ProximityInfoParams::LOCALMIN_DISTANCE_AND_NEAR_TO_KEY_SCORE;
+ }
+ // Angle
+ const float angle1 = GeometryUtils::getAngle(x, y, sampledInputXs->back(),
+ sampledInputYs->back());
+ const float angle2 = GeometryUtils::getAngle(sampledInputXs->back(), sampledInputYs->back(),
+ (*sampledInputXs)[size - 2], (*sampledInputYs)[size - 2]);
+ const float angleDiff = GeometryUtils::getAngleDiff(angle1, angle2);
+
+ // Save corner
+ if (distPrev > baseSampleRate * ProximityInfoParams::CORNER_CHECK_DISTANCE_THRESHOLD_SCALE
+ && (sumAngle > ProximityInfoParams::CORNER_SUM_ANGLE_THRESHOLD
+ || angleDiff > ProximityInfoParams::CORNER_ANGLE_THRESHOLD_FOR_POINT_SCORE)) {
+ score += ProximityInfoParams::CORNER_SCORE;
+ }
+ return score;
+}
+
+// Sampling touch point and pushing information to vectors.
+// Returning if previous point is popped or not.
+/* static */ bool ProximityInfoStateUtils::pushTouchPoint(const ProximityInfo *const proximityInfo,
+ const int maxPointToKeyLength, const int inputIndex, const int nodeCodePoint, int x, int y,
+ const int time, const bool isGeometric, const bool doSampling,
+ const bool isLastPoint, const float sumAngle,
+ NearKeysDistanceMap *const currentNearKeysDistances,
+ const NearKeysDistanceMap *const prevNearKeysDistances,
+ const NearKeysDistanceMap *const prevPrevNearKeysDistances,
+ std::vector<int> *sampledInputXs, std::vector<int> *sampledInputYs,
+ std::vector<int> *sampledInputTimes, std::vector<int> *sampledLengthCache,
+ std::vector<int> *sampledInputIndice) {
+ const int mostCommonKeyWidth = proximityInfo->getMostCommonKeyWidth();
+
+ size_t size = sampledInputXs->size();
+ bool popped = false;
+ if (nodeCodePoint < 0 && doSampling) {
+ const float nearest = updateNearKeysDistances(proximityInfo, maxPointToKeyLength, x, y,
+ isGeometric, currentNearKeysDistances);
+ const float score = getPointScore(mostCommonKeyWidth, x, y, time, isLastPoint, nearest,
+ sumAngle, currentNearKeysDistances, prevNearKeysDistances,
+ prevPrevNearKeysDistances, sampledInputXs, sampledInputYs);
+ if (score < 0) {
+ // Pop previous point because it would be useless.
+ popInputData(sampledInputXs, sampledInputYs, sampledInputTimes, sampledLengthCache,
+ sampledInputIndice);
+ size = sampledInputXs->size();
+ popped = true;
+ } else {
+ popped = false;
+ }
+ // Check if the last point should be skipped.
+ if (isLastPoint && size > 0) {
+ if (GeometryUtils::getDistanceInt(x, y, sampledInputXs->back(), sampledInputYs->back())
+ * ProximityInfoParams::LAST_POINT_SKIP_DISTANCE_SCALE < mostCommonKeyWidth) {
+ // This point is not used because it's too close to the previous point.
+ if (DEBUG_GEO_FULL) {
+ AKLOGI("p0: size = %zd, x = %d, y = %d, lx = %d, ly = %d, dist = %d, "
+ "width = %d", size, x, y, sampledInputXs->back(),
+ sampledInputYs->back(), GeometryUtils::getDistanceInt(
+ x, y, sampledInputXs->back(), sampledInputYs->back()),
+ mostCommonKeyWidth
+ / ProximityInfoParams::LAST_POINT_SKIP_DISTANCE_SCALE);
+ }
+ return popped;
+ }
+ }
+ }
+
+ if (nodeCodePoint >= 0 && (x < 0 || y < 0)) {
+ const int keyId = proximityInfo->getKeyIndexOf(nodeCodePoint);
+ if (keyId >= 0) {
+ x = proximityInfo->getKeyCenterXOfKeyIdG(keyId, NOT_AN_INDEX, isGeometric);
+ y = proximityInfo->getKeyCenterYOfKeyIdG(keyId, NOT_AN_INDEX, isGeometric);
+ }
+ }
+
+ // Pushing point information.
+ if (size > 0) {
+ sampledLengthCache->push_back(
+ sampledLengthCache->back() + GeometryUtils::getDistanceInt(
+ x, y, sampledInputXs->back(), sampledInputYs->back()));
+ } else {
+ sampledLengthCache->push_back(0);
+ }
+ sampledInputXs->push_back(x);
+ sampledInputYs->push_back(y);
+ sampledInputTimes->push_back(time);
+ sampledInputIndice->push_back(inputIndex);
+ if (DEBUG_GEO_FULL) {
+ AKLOGI("pushTouchPoint: x = %03d, y = %03d, time = %d, index = %d, popped ? %01d",
+ x, y, time, inputIndex, popped);
+ }
+ return popped;
+}
+
+/* static */ float ProximityInfoStateUtils::calculateBeelineSpeedRate(const int mostCommonKeyWidth,
+ const float averageSpeed, const int id, const int inputSize, const int *const xCoordinates,
+ const int *const yCoordinates, const int *times, const int sampledInputSize,
+ const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs,
+ const std::vector<int> *const sampledInputIndices) {
+ if (sampledInputSize <= 0 || averageSpeed < 0.001f) {
+ if (DEBUG_SAMPLING_POINTS) {
+ AKLOGI("--- invalid state: cancel. size = %d, ave = %f",
+ sampledInputSize, averageSpeed);
+ }
+ return 1.0f;
+ }
+ const int lookupRadius = mostCommonKeyWidth
+ * ProximityInfoParams::LOOKUP_RADIUS_PERCENTILE / MAX_PERCENTILE;
+ const int x0 = (*sampledInputXs)[id];
+ const int y0 = (*sampledInputYs)[id];
+ const int actualInputIndex = (*sampledInputIndices)[id];
+ int tempTime = 0;
+ int tempBeelineDistance = 0;
+ int start = actualInputIndex;
+ // lookup forward
+ while (start > 0 && tempBeelineDistance < lookupRadius) {
+ tempTime += times[start] - times[start - 1];
+ --start;
+ tempBeelineDistance = GeometryUtils::getDistanceInt(x0, y0, xCoordinates[start],
+ yCoordinates[start]);
+ }
+ // Exclusive unless this is an edge point
+ if (start > 0 && start < actualInputIndex) {
+ ++start;
+ }
+ tempTime= 0;
+ tempBeelineDistance = 0;
+ int end = actualInputIndex;
+ // lookup backward
+ while (end < (inputSize - 1) && tempBeelineDistance < lookupRadius) {
+ tempTime += times[end + 1] - times[end];
+ ++end;
+ tempBeelineDistance = GeometryUtils::getDistanceInt(x0, y0, xCoordinates[end],
+ yCoordinates[end]);
+ }
+ // Exclusive unless this is an edge point
+ if (end > actualInputIndex && end < (inputSize - 1)) {
+ --end;
+ }
+
+ if (start >= end) {
+ if (DEBUG_DOUBLE_LETTER) {
+ AKLOGI("--- double letter: start == end %d", start);
+ }
+ return 1.0f;
+ }
+
+ const int x2 = xCoordinates[start];
+ const int y2 = yCoordinates[start];
+ const int x3 = xCoordinates[end];
+ const int y3 = yCoordinates[end];
+ const int beelineDistance = GeometryUtils::getDistanceInt(x2, y2, x3, y3);
+ int adjustedStartTime = times[start];
+ if (start == 0 && actualInputIndex == 0 && inputSize > 1) {
+ adjustedStartTime += ProximityInfoParams::FIRST_POINT_TIME_OFFSET_MILLIS;
+ }
+ int adjustedEndTime = times[end];
+ if (end == (inputSize - 1) && inputSize > 1) {
+ adjustedEndTime -= ProximityInfoParams::FIRST_POINT_TIME_OFFSET_MILLIS;
+ }
+ const int time = adjustedEndTime - adjustedStartTime;
+ if (time <= 0) {
+ return 1.0f;
+ }
+
+ if (time >= ProximityInfoParams::STRONG_DOUBLE_LETTER_TIME_MILLIS){
+ return 0.0f;
+ }
+ if (DEBUG_DOUBLE_LETTER) {
+ AKLOGI("--- (%d, %d) double letter: start = %d, end = %d, dist = %d, time = %d,"
+ " speed = %f, ave = %f, val = %f, start time = %d, end time = %d",
+ id, (*sampledInputIndices)[id], start, end, beelineDistance, time,
+ (static_cast<float>(beelineDistance) / static_cast<float>(time)), averageSpeed,
+ ((static_cast<float>(beelineDistance) / static_cast<float>(time))
+ / averageSpeed), adjustedStartTime, adjustedEndTime);
+ }
+ // Offset 1%
+ // TODO: Detect double letter more smartly
+ return 0.01f + static_cast<float>(beelineDistance) / static_cast<float>(time) / averageSpeed;
+}
+
+/* static */ float ProximityInfoStateUtils::getPointAngle(
+ const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs, const int index) {
+ if (!sampledInputXs || !sampledInputYs) {
+ return 0.0f;
+ }
+ const int sampledInputSize = sampledInputXs->size();
+ if (index <= 0 || index >= sampledInputSize - 1) {
+ return 0.0f;
+ }
+ const float previousDirection = getDirection(sampledInputXs, sampledInputYs, index - 1, index);
+ const float nextDirection = getDirection(sampledInputXs, sampledInputYs, index, index + 1);
+ const float directionDiff = GeometryUtils::getAngleDiff(previousDirection, nextDirection);
+ return directionDiff;
+}
+
+/* static */ float ProximityInfoStateUtils::getPointsAngle(
+ const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs,
+ const int index0, const int index1, const int index2) {
+ if (!sampledInputXs || !sampledInputYs) {
+ return 0.0f;
+ }
+ const int sampledInputSize = sampledInputXs->size();
+ if (index0 < 0 || index0 > sampledInputSize - 1) {
+ return 0.0f;
+ }
+ if (index1 < 0 || index1 > sampledInputSize - 1) {
+ return 0.0f;
+ }
+ if (index2 < 0 || index2 > sampledInputSize - 1) {
+ return 0.0f;
+ }
+ const float previousDirection = getDirection(sampledInputXs, sampledInputYs, index0, index1);
+ const float nextDirection = getDirection(sampledInputXs, sampledInputYs, index1, index2);
+ return GeometryUtils::getAngleDiff(previousDirection, nextDirection);
+}
+
+// This function basically converts from a length to an edit distance. Accordingly, it's obviously
+// wrong to compare with mMaxPointToKeyLength.
+/* static */ float ProximityInfoStateUtils::getPointToKeyByIdLength(const float maxPointToKeyLength,
+ const std::vector<float> *const sampledNormalizedSquaredLengthCache, const int keyCount,
+ const int inputIndex, const int keyId) {
+ if (keyId != NOT_AN_INDEX) {
+ const int index = inputIndex * keyCount + keyId;
+ return std::min((*sampledNormalizedSquaredLengthCache)[index], maxPointToKeyLength);
+ }
+ // If the char is not a key on the keyboard then return the max length.
+ return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
+}
+
+// Updates probabilities of aligning to some keys and skipping.
+// Word suggestion should be based on this probabilities.
+/* static */ void ProximityInfoStateUtils::updateAlignPointProbabilities(
+ const float maxPointToKeyLength, const int mostCommonKeyWidth, const int keyCount,
+ const int start, const int sampledInputSize, const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs,
+ const std::vector<float> *const sampledSpeedRates,
+ const std::vector<int> *const sampledLengthCache,
+ const std::vector<float> *const sampledNormalizedSquaredLengthCache,
+ const ProximityInfo *const proximityInfo,
+ std::vector<std::unordered_map<int, float>> *charProbabilities) {
+ charProbabilities->resize(sampledInputSize);
+ // Calculates probabilities of using a point as a correlated point with the character
+ // for each point.
+ for (int i = start; i < sampledInputSize; ++i) {
+ (*charProbabilities)[i].clear();
+ // First, calculates skip probability. Starts from MAX_SKIP_PROBABILITY.
+ // Note that all values that are multiplied to this probability should be in [0.0, 1.0];
+ float skipProbability = ProximityInfoParams::MAX_SKIP_PROBABILITY;
+
+ const float currentAngle = getPointAngle(sampledInputXs, sampledInputYs, i);
+ const float speedRate = (*sampledSpeedRates)[i];
+
+ float nearestKeyDistance = static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
+ for (int j = 0; j < keyCount; ++j) {
+ const float distance = getPointToKeyByIdLength(
+ maxPointToKeyLength, sampledNormalizedSquaredLengthCache, keyCount, i, j);
+ if (distance < nearestKeyDistance) {
+ nearestKeyDistance = distance;
+ }
+ }
+
+ if (i == 0) {
+ skipProbability *= std::min(1.0f,
+ nearestKeyDistance * ProximityInfoParams::NEAREST_DISTANCE_WEIGHT
+ + ProximityInfoParams::NEAREST_DISTANCE_BIAS);
+ // Promote the first point
+ skipProbability *= ProximityInfoParams::SKIP_FIRST_POINT_PROBABILITY;
+ } else if (i == sampledInputSize - 1) {
+ skipProbability *= std::min(1.0f,
+ nearestKeyDistance * ProximityInfoParams::NEAREST_DISTANCE_WEIGHT_FOR_LAST
+ + ProximityInfoParams::NEAREST_DISTANCE_BIAS_FOR_LAST);
+ // Promote the last point
+ skipProbability *= ProximityInfoParams::SKIP_LAST_POINT_PROBABILITY;
+ } else {
+ // If the current speed is relatively slower than adjacent keys, we promote this point.
+ if ((*sampledSpeedRates)[i - 1] - ProximityInfoParams::SPEED_MARGIN > speedRate
+ && speedRate
+ < (*sampledSpeedRates)[i + 1] - ProximityInfoParams::SPEED_MARGIN) {
+ if (currentAngle < ProximityInfoParams::CORNER_ANGLE_THRESHOLD) {
+ skipProbability *= std::min(1.0f, speedRate
+ * ProximityInfoParams::SLOW_STRAIGHT_WEIGHT_FOR_SKIP_PROBABILITY);
+ } else {
+ // If the angle is small enough, we promote this point more. (e.g. pit vs put)
+ skipProbability *= std::min(1.0f,
+ speedRate * ProximityInfoParams::SPEED_WEIGHT_FOR_SKIP_PROBABILITY
+ + ProximityInfoParams::MIN_SPEED_RATE_FOR_SKIP_PROBABILITY);
+ }
+ }
+
+ skipProbability *= std::min(1.0f,
+ speedRate * nearestKeyDistance * ProximityInfoParams::NEAREST_DISTANCE_WEIGHT
+ + ProximityInfoParams::NEAREST_DISTANCE_BIAS);
+
+ // Adjusts skip probability by a rate depending on angle.
+ // ANGLE_RATE of skipProbability is adjusted by current angle.
+ skipProbability *= (M_PI_F - currentAngle) / M_PI_F * ProximityInfoParams::ANGLE_WEIGHT
+ + (1.0f - ProximityInfoParams::ANGLE_WEIGHT);
+ if (currentAngle > ProximityInfoParams::DEEP_CORNER_ANGLE_THRESHOLD) {
+ skipProbability *= ProximityInfoParams::SKIP_DEEP_CORNER_PROBABILITY;
+ }
+ // We assume the angle of this point is the angle for point[i], point[i - 2]
+ // and point[i - 3]. The reason why we don't use the angle for point[i], point[i - 1]
+ // and point[i - 2] is this angle can be more affected by the noise.
+ const float prevAngle = getPointsAngle(sampledInputXs, sampledInputYs, i, i - 2, i - 3);
+ if (i >= 3 && prevAngle < ProximityInfoParams::STRAIGHT_ANGLE_THRESHOLD
+ && currentAngle > ProximityInfoParams::CORNER_ANGLE_THRESHOLD) {
+ skipProbability *= ProximityInfoParams::SKIP_CORNER_PROBABILITY;
+ }
+ }
+
+ // probabilities must be in [0.0, ProximityInfoParams::MAX_SKIP_PROBABILITY];
+ ASSERT(skipProbability >= 0.0f);
+ ASSERT(skipProbability <= ProximityInfoParams::MAX_SKIP_PROBABILITY);
+ (*charProbabilities)[i][NOT_AN_INDEX] = skipProbability;
+
+ // Second, calculates key probabilities by dividing the rest probability
+ // (1.0f - skipProbability).
+ const float inputCharProbability = 1.0f - skipProbability;
+
+ const float speedMultipliedByAngleRate = std::min(speedRate * currentAngle / M_PI_F
+ * ProximityInfoParams::SPEEDxANGLE_WEIGHT_FOR_STANDARD_DEVIATION,
+ ProximityInfoParams::MAX_SPEEDxANGLE_RATE_FOR_STANDARD_DEVIATION);
+ const float speedMultipliedByNearestKeyDistanceRate = std::min(
+ speedRate * nearestKeyDistance
+ * ProximityInfoParams::SPEEDxNEAREST_WEIGHT_FOR_STANDARD_DEVIATION,
+ ProximityInfoParams::MAX_SPEEDxNEAREST_RATE_FOR_STANDARD_DEVIATION);
+ const float sigma = (speedMultipliedByAngleRate + speedMultipliedByNearestKeyDistanceRate
+ + ProximityInfoParams::MIN_STANDARD_DEVIATION) * mostCommonKeyWidth;
+ float theta = 0.0f;
+ // TODO: Use different metrics to compute sigmas.
+ float sigmaX = sigma;
+ float sigmaY = sigma;
+ if (i == 0 && i != sampledInputSize - 1) {
+ // First point
+ theta = getDirection(sampledInputXs, sampledInputYs, i + 1, i);
+ sigmaX *= ProximityInfoParams::STANDARD_DEVIATION_X_WEIGHT_FOR_FIRST;
+ sigmaY *= ProximityInfoParams::STANDARD_DEVIATION_Y_WEIGHT_FOR_FIRST;
+ } else {
+ if (i == sampledInputSize - 1) {
+ // Last point
+ sigmaX *= ProximityInfoParams::STANDARD_DEVIATION_X_WEIGHT_FOR_LAST;
+ sigmaY *= ProximityInfoParams::STANDARD_DEVIATION_Y_WEIGHT_FOR_LAST;
+ } else {
+ sigmaX *= ProximityInfoParams::STANDARD_DEVIATION_X_WEIGHT;
+ sigmaY *= ProximityInfoParams::STANDARD_DEVIATION_Y_WEIGHT;
+ }
+ theta = getDirection(sampledInputXs, sampledInputYs, i, i - 1);
+ }
+ NormalDistribution2D distribution((*sampledInputXs)[i], sigmaX, (*sampledInputYs)[i],
+ sigmaY, theta);
+ // Summing up probability densities of all near keys.
+ float sumOfProbabilityDensities = 0.0f;
+ for (int j = 0; j < keyCount; ++j) {
+ sumOfProbabilityDensities += distribution.getProbabilityDensity(
+ proximityInfo->getKeyCenterXOfKeyIdG(j,
+ NOT_A_COORDINATE /* referencePointX */, true /* isGeometric */),
+ proximityInfo->getKeyCenterYOfKeyIdG(j,
+ NOT_A_COORDINATE /* referencePointY */, true /* isGeometric */));
+ }
+
+ // Split the probability of an input point to keys that are close to the input point.
+ for (int j = 0; j < keyCount; ++j) {
+ const float probabilityDensity = distribution.getProbabilityDensity(
+ proximityInfo->getKeyCenterXOfKeyIdG(j,
+ NOT_A_COORDINATE /* referencePointX */, true /* isGeometric */),
+ proximityInfo->getKeyCenterYOfKeyIdG(j,
+ NOT_A_COORDINATE /* referencePointY */, true /* isGeometric */));
+ const float probability = inputCharProbability * probabilityDensity
+ / sumOfProbabilityDensities;
+ (*charProbabilities)[i][j] = probability;
+ }
+ }
+
+ if (DEBUG_POINTS_PROBABILITY) {
+ for (int i = 0; i < sampledInputSize; ++i) {
+ std::stringstream sstream;
+ sstream << i << ", ";
+ sstream << "(" << (*sampledInputXs)[i] << ", " << (*sampledInputYs)[i] << "), ";
+ sstream << "Speed: "<< (*sampledSpeedRates)[i] << ", ";
+ sstream << "Angle: "<< getPointAngle(sampledInputXs, sampledInputYs, i) << ", \n";
+
+ for (std::unordered_map<int, float>::iterator it = (*charProbabilities)[i].begin();
+ it != (*charProbabilities)[i].end(); ++it) {
+ if (it->first == NOT_AN_INDEX) {
+ sstream << it->first
+ << "(skip):"
+ << it->second
+ << "\n";
+ } else {
+ sstream << it->first
+ << "("
+ //<< static_cast<char>(mProximityInfo->getCodePointOf(it->first))
+ << "):"
+ << it->second
+ << "\n";
+ }
+ }
+ AKLOGI("%s", sstream.str().c_str());
+ }
+ }
+
+ // Decrease key probabilities of points which don't have the highest probability of that key
+ // among nearby points. Probabilities of the first point and the last point are not suppressed.
+ for (int i = std::max(start, 1); i < sampledInputSize; ++i) {
+ for (int j = i + 1; j < sampledInputSize; ++j) {
+ if (!suppressCharProbabilities(
+ mostCommonKeyWidth, sampledInputSize, sampledLengthCache, i, j,
+ charProbabilities)) {
+ break;
+ }
+ }
+ for (int j = i - 1; j >= std::max(start, 0); --j) {
+ if (!suppressCharProbabilities(
+ mostCommonKeyWidth, sampledInputSize, sampledLengthCache, i, j,
+ charProbabilities)) {
+ break;
+ }
+ }
+ }
+
+ // Converting from raw probabilities to log probabilities to calculate spatial distance.
+ for (int i = start; i < sampledInputSize; ++i) {
+ for (int j = 0; j < keyCount; ++j) {
+ std::unordered_map<int, float>::iterator it = (*charProbabilities)[i].find(j);
+ if (it == (*charProbabilities)[i].end()){
+ continue;
+ } else if(it->second < ProximityInfoParams::MIN_PROBABILITY) {
+ // Erases from near keys vector because it has very low probability.
+ (*charProbabilities)[i].erase(j);
+ } else {
+ it->second = -logf(it->second);
+ }
+ }
+ (*charProbabilities)[i][NOT_AN_INDEX] = -logf((*charProbabilities)[i][NOT_AN_INDEX]);
+ }
+}
+
+/* static */ void ProximityInfoStateUtils::updateSampledSearchKeySets(
+ const ProximityInfo *const proximityInfo, const int sampledInputSize,
+ const int lastSavedInputSize, const std::vector<int> *const sampledLengthCache,
+ const std::vector<std::unordered_map<int, float>> *const charProbabilities,
+ std::vector<NearKeycodesSet> *sampledSearchKeySets,
+ std::vector<std::vector<int>> *sampledSearchKeyVectors) {
+ sampledSearchKeySets->resize(sampledInputSize);
+ sampledSearchKeyVectors->resize(sampledInputSize);
+ const int readForwordLength = static_cast<int>(
+ hypotf(proximityInfo->getKeyboardWidth(), proximityInfo->getKeyboardHeight())
+ * ProximityInfoParams::SEARCH_KEY_RADIUS_RATIO);
+ for (int i = 0; i < sampledInputSize; ++i) {
+ if (i >= lastSavedInputSize) {
+ (*sampledSearchKeySets)[i].reset();
+ }
+ for (int j = std::max(i, lastSavedInputSize); j < sampledInputSize; ++j) {
+ // TODO: Investigate if this is required. This may not fail.
+ if ((*sampledLengthCache)[j] - (*sampledLengthCache)[i] >= readForwordLength) {
+ break;
+ }
+ for(const auto& charProbability : charProbabilities->at(j)) {
+ if (charProbability.first == NOT_AN_INDEX) {
+ continue;
+ }
+ (*sampledSearchKeySets)[i].set(charProbability.first);
+ }
+ }
+ }
+ const int keyCount = proximityInfo->getKeyCount();
+ for (int i = 0; i < sampledInputSize; ++i) {
+ std::vector<int> *searchKeyVector = &(*sampledSearchKeyVectors)[i];
+ searchKeyVector->clear();
+ for (int j = 0; j < keyCount; ++j) {
+ if ((*sampledSearchKeySets)[i].test(j)) {
+ const int keyCodePoint = proximityInfo->getCodePointOf(j);
+ if (std::find(searchKeyVector->begin(), searchKeyVector->end(), keyCodePoint)
+ == searchKeyVector->end()) {
+ searchKeyVector->push_back(keyCodePoint);
+ }
+ }
+ }
+ }
+}
+
+// Decreases char probabilities of index0 by checking probabilities of a near point (index1) and
+// increases char probabilities of index1 by checking probabilities of index0.
+/* static */ bool ProximityInfoStateUtils::suppressCharProbabilities(const int mostCommonKeyWidth,
+ const int sampledInputSize, const std::vector<int> *const lengthCache,
+ const int index0, const int index1,
+ std::vector<std::unordered_map<int, float>> *charProbabilities) {
+ ASSERT(0 <= index0 && index0 < sampledInputSize);
+ ASSERT(0 <= index1 && index1 < sampledInputSize);
+ const float keyWidthFloat = static_cast<float>(mostCommonKeyWidth);
+ const float diff = fabsf(static_cast<float>((*lengthCache)[index0] - (*lengthCache)[index1]));
+ if (diff > keyWidthFloat * ProximityInfoParams::SUPPRESSION_LENGTH_WEIGHT) {
+ return false;
+ }
+ const float suppressionRate = ProximityInfoParams::MIN_SUPPRESSION_RATE
+ + diff / keyWidthFloat / ProximityInfoParams::SUPPRESSION_LENGTH_WEIGHT
+ * ProximityInfoParams::SUPPRESSION_WEIGHT;
+ for (std::unordered_map<int, float>::iterator it = (*charProbabilities)[index0].begin();
+ it != (*charProbabilities)[index0].end(); ++it) {
+ std::unordered_map<int, float>::iterator it2 = (*charProbabilities)[index1].find(it->first);
+ if (it2 != (*charProbabilities)[index1].end() && it->second < it2->second) {
+ const float newProbability = it->second * suppressionRate;
+ const float suppression = it->second - newProbability;
+ it->second = newProbability;
+ // mCharProbabilities[index0][NOT_AN_INDEX] is the probability of skipping this point.
+ (*charProbabilities)[index0][NOT_AN_INDEX] += suppression;
+
+ // Add the probability of the same key nearby index1
+ const float probabilityGain = std::min(suppression
+ * ProximityInfoParams::SUPPRESSION_WEIGHT_FOR_PROBABILITY_GAIN,
+ (*charProbabilities)[index1][NOT_AN_INDEX]
+ * ProximityInfoParams::SKIP_PROBABALITY_WEIGHT_FOR_PROBABILITY_GAIN);
+ it2->second += probabilityGain;
+ (*charProbabilities)[index1][NOT_AN_INDEX] -= probabilityGain;
+ }
+ }
+ return true;
+}
+
+/* static */ bool ProximityInfoStateUtils::checkAndReturnIsContinuousSuggestionPossible(
+ const int inputSize, const int *const xCoordinates, const int *const yCoordinates,
+ const int *const times, const int sampledInputSize,
+ const std::vector<int> *const sampledInputXs, const std::vector<int> *const sampledInputYs,
+ const std::vector<int> *const sampledTimes,
+ const std::vector<int> *const sampledInputIndices) {
+ if (inputSize < sampledInputSize) {
+ return false;
+ }
+ for (int i = 0; i < sampledInputSize; ++i) {
+ const int index = (*sampledInputIndices)[i];
+ if (index >= inputSize) {
+ return false;
+ }
+ if (xCoordinates[index] != (*sampledInputXs)[i]
+ || yCoordinates[index] != (*sampledInputYs)[i]) {
+ return false;
+ }
+ if (!times) {
+ continue;
+ }
+ if (times[index] != (*sampledTimes)[i]) {
+ return false;
+ }
+ }
+ return true;
+}
+
+// Get a word that is detected by tracing the most probable string into codePointBuf and
+// returns probability of generating the word.
+/* static */ float ProximityInfoStateUtils::getMostProbableString(
+ const ProximityInfo *const proximityInfo, const int sampledInputSize,
+ const std::vector<std::unordered_map<int, float>> *const charProbabilities,
+ int *const codePointBuf) {
+ ASSERT(sampledInputSize >= 0);
+ memset(codePointBuf, 0, sizeof(codePointBuf[0]) * MAX_WORD_LENGTH);
+ int index = 0;
+ float sumLogProbability = 0.0f;
+ // TODO: Current implementation is greedy algorithm. DP would be efficient for many cases.
+ for (int i = 0; i < sampledInputSize && index < MAX_WORD_LENGTH - 1; ++i) {
+ float minLogProbability = static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
+ int character = NOT_AN_INDEX;
+ for (std::unordered_map<int, float>::const_iterator it = (*charProbabilities)[i].begin();
+ it != (*charProbabilities)[i].end(); ++it) {
+ const float logProbability = (it->first != NOT_AN_INDEX)
+ ? it->second + ProximityInfoParams::DEMOTION_LOG_PROBABILITY : it->second;
+ if (logProbability < minLogProbability) {
+ minLogProbability = logProbability;
+ character = it->first;
+ }
+ }
+ if (character != NOT_AN_INDEX) {
+ const int codePoint = proximityInfo->getCodePointOf(character);
+ if (codePoint == NOT_A_CODE_POINT) {
+ AKLOGE("Key index(%d) is not found. Cannot construct most probable string",
+ character);
+ ASSERT(false);
+ // Make the length zero, which means most probable string won't be used.
+ index = 0;
+ break;
+ }
+ codePointBuf[index] = codePoint;
+ index++;
+ }
+ sumLogProbability += minLogProbability;
+ }
+ codePointBuf[index] = '\0';
+ return sumLogProbability;
+}
+
+/* static */ void ProximityInfoStateUtils::dump(const bool isGeometric, const int inputSize,
+ const int *const inputXCoordinates, const int *const inputYCoordinates,
+ const int sampledInputSize, const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs,
+ const std::vector<int> *const sampledTimes,
+ const std::vector<float> *const sampledSpeedRates,
+ const std::vector<int> *const sampledBeelineSpeedPercentiles) {
+ if (DEBUG_GEO_FULL) {
+ for (int i = 0; i < sampledInputSize; ++i) {
+ AKLOGI("Sampled(%d): x = %d, y = %d, time = %d", i, (*sampledInputXs)[i],
+ (*sampledInputYs)[i], sampledTimes ? (*sampledTimes)[i] : -1);
+ }
+ }
+
+ std::stringstream originalX, originalY, sampledX, sampledY;
+ for (int i = 0; i < inputSize; ++i) {
+ originalX << inputXCoordinates[i];
+ originalY << inputYCoordinates[i];
+ if (i != inputSize - 1) {
+ originalX << ";";
+ originalY << ";";
+ }
+ }
+ AKLOGI("===== sampled points =====");
+ for (int i = 0; i < sampledInputSize; ++i) {
+ if (isGeometric) {
+ AKLOGI("%d: x = %d, y = %d, time = %d, relative speed = %.4f, beeline speed = %d",
+ i, (*sampledInputXs)[i], (*sampledInputYs)[i], (*sampledTimes)[i],
+ (*sampledSpeedRates)[i], (*sampledBeelineSpeedPercentiles)[i]);
+ }
+ sampledX << (*sampledInputXs)[i];
+ sampledY << (*sampledInputYs)[i];
+ if (i != sampledInputSize - 1) {
+ sampledX << ";";
+ sampledY << ";";
+ }
+ }
+ AKLOGI("original points:\n%s, %s,\nsampled points:\n%s, %s,\n",
+ originalX.str().c_str(), originalY.str().c_str(), sampledX.str().c_str(),
+ sampledY.str().c_str());
+}
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/core/layout/proximity_info_state_utils.h b/third_party/android_prediction/suggest/core/layout/proximity_info_state_utils.h
new file mode 100644
index 0000000..29946ae
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/layout/proximity_info_state_utils.h
@@ -0,0 +1,160 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PROXIMITY_INFO_STATE_UTILS_H
+#define LATINIME_PROXIMITY_INFO_STATE_UTILS_H
+
+#include <bitset>
+#include <unordered_map>
+#include <vector>
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+class ProximityInfo;
+class ProximityInfoParams;
+
+class ProximityInfoStateUtils {
+ public:
+ typedef std::unordered_map<int, float> NearKeysDistanceMap;
+ typedef std::bitset<MAX_KEY_COUNT_IN_A_KEYBOARD> NearKeycodesSet;
+
+ static int trimLastTwoTouchPoints(std::vector<int> *sampledInputXs,
+ std::vector<int> *sampledInputYs, std::vector<int> *sampledInputTimes,
+ std::vector<int> *sampledLengthCache, std::vector<int> *sampledInputIndice);
+ static int updateTouchPoints(const ProximityInfo *const proximityInfo,
+ const int maxPointToKeyLength, const int *const inputProximities,
+ const int *const inputXCoordinates, const int *const inputYCoordinates,
+ const int *const times, const int *const pointerIds, const int inputSize,
+ const bool isGeometric, const int pointerId, const int pushTouchPointStartIndex,
+ std::vector<int> *sampledInputXs, std::vector<int> *sampledInputYs,
+ std::vector<int> *sampledInputTimes, std::vector<int> *sampledLengthCache,
+ std::vector<int> *sampledInputIndice);
+ static const int *getProximityCodePointsAt(const int *const inputProximities, const int index);
+ static int getPrimaryCodePointAt(const int *const inputProximities, const int index);
+ static void popInputData(std::vector<int> *sampledInputXs, std::vector<int> *sampledInputYs,
+ std::vector<int> *sampledInputTimes, std::vector<int> *sampledLengthCache,
+ std::vector<int> *sampledInputIndice);
+ static float refreshSpeedRates(const int inputSize, const int *const xCoordinates,
+ const int *const yCoordinates, const int *const times, const int lastSavedInputSize,
+ const int sampledInputSize, const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs,
+ const std::vector<int> *const sampledInputTimes,
+ const std::vector<int> *const sampledLengthCache,
+ const std::vector<int> *const sampledInputIndice,
+ std::vector<float> *sampledSpeedRates, std::vector<float> *sampledDirections);
+ static void refreshBeelineSpeedRates(const int mostCommonKeyWidth, const float averageSpeed,
+ const int inputSize, const int *const xCoordinates, const int *const yCoordinates,
+ const int *times, const int sampledInputSize,
+ const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs, const std::vector<int> *const inputIndice,
+ std::vector<int> *beelineSpeedPercentiles);
+ static float getDirection(const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs, const int index0, const int index1);
+ static void updateAlignPointProbabilities(const float maxPointToKeyLength,
+ const int mostCommonKeyWidth, const int keyCount, const int start,
+ const int sampledInputSize, const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs,
+ const std::vector<float> *const sampledSpeedRates,
+ const std::vector<int> *const sampledLengthCache,
+ const std::vector<float> *const sampledNormalizedSquaredLengthCache,
+ const ProximityInfo *const proximityInfo,
+ std::vector<std::unordered_map<int, float>> *charProbabilities);
+ static void updateSampledSearchKeySets(const ProximityInfo *const proximityInfo,
+ const int sampledInputSize, const int lastSavedInputSize,
+ const std::vector<int> *const sampledLengthCache,
+ const std::vector<std::unordered_map<int, float>> *const charProbabilities,
+ std::vector<NearKeycodesSet> *sampledSearchKeySets,
+ std::vector<std::vector<int>> *sampledSearchKeyVectors);
+ static float getPointToKeyByIdLength(const float maxPointToKeyLength,
+ const std::vector<float> *const sampledNormalizedSquaredLengthCache, const int keyCount,
+ const int inputIndex, const int keyId);
+ static void initGeometricDistanceInfos(const ProximityInfo *const proximityInfo,
+ const int sampledInputSize, const int lastSavedInputSize, const bool isGeometric,
+ const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs,
+ std::vector<float> *sampledNormalizedSquaredLengthCache);
+ static void initPrimaryInputWord(const int inputSize, const int *const inputProximities,
+ int *primaryInputWord);
+ static void dump(const bool isGeometric, const int inputSize,
+ const int *const inputXCoordinates, const int *const inputYCoordinates,
+ const int sampledInputSize, const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs,
+ const std::vector<int> *const sampledTimes,
+ const std::vector<float> *const sampledSpeedRates,
+ const std::vector<int> *const sampledBeelineSpeedPercentiles);
+ static bool checkAndReturnIsContinuousSuggestionPossible(const int inputSize,
+ const int *const xCoordinates, const int *const yCoordinates, const int *const times,
+ const int sampledInputSize, const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs,
+ const std::vector<int> *const sampledTimes,
+ const std::vector<int> *const sampledInputIndices);
+ // TODO: Move to most_probable_string_utils.h
+ static float getMostProbableString(const ProximityInfo *const proximityInfo,
+ const int sampledInputSize,
+ const std::vector<std::unordered_map<int, float>> *const charProbabilities,
+ int *const codePointBuf);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(ProximityInfoStateUtils);
+
+ static float updateNearKeysDistances(const ProximityInfo *const proximityInfo,
+ const float maxPointToKeyLength, const int x, const int y,
+ const bool isGeometric,
+ NearKeysDistanceMap *const currentNearKeysDistances);
+ static bool isPrevLocalMin(const NearKeysDistanceMap *const currentNearKeysDistances,
+ const NearKeysDistanceMap *const prevNearKeysDistances,
+ const NearKeysDistanceMap *const prevPrevNearKeysDistances);
+ static float getPointScore(const int mostCommonKeyWidth, const int x, const int y,
+ const int time, const bool lastPoint, const float nearest, const float sumAngle,
+ const NearKeysDistanceMap *const currentNearKeysDistances,
+ const NearKeysDistanceMap *const prevNearKeysDistances,
+ const NearKeysDistanceMap *const prevPrevNearKeysDistances,
+ std::vector<int> *sampledInputXs, std::vector<int> *sampledInputYs);
+ static bool pushTouchPoint(const ProximityInfo *const proximityInfo,
+ const int maxPointToKeyLength, const int inputIndex, const int nodeCodePoint, int x,
+ int y, const int time, const bool isGeometric,
+ const bool doSampling, const bool isLastPoint,
+ const float sumAngle, NearKeysDistanceMap *const currentNearKeysDistances,
+ const NearKeysDistanceMap *const prevNearKeysDistances,
+ const NearKeysDistanceMap *const prevPrevNearKeysDistances,
+ std::vector<int> *sampledInputXs, std::vector<int> *sampledInputYs,
+ std::vector<int> *sampledInputTimes, std::vector<int> *sampledLengthCache,
+ std::vector<int> *sampledInputIndice);
+ static float calculateBeelineSpeedRate(const int mostCommonKeyWidth, const float averageSpeed,
+ const int id, const int inputSize, const int *const xCoordinates,
+ const int *const yCoordinates, const int *times, const int sampledInputSize,
+ const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs,
+ const std::vector<int> *const inputIndice);
+ static float getPointAngle(const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs, const int index);
+ static float getPointsAngle(const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs, const int index0, const int index1,
+ const int index2);
+ static bool suppressCharProbabilities(const int mostCommonKeyWidth,
+ const int sampledInputSize, const std::vector<int> *const lengthCache, const int index0,
+ const int index1, std::vector<std::unordered_map<int, float>> *charProbabilities);
+ static float calculateSquaredDistanceFromSweetSpotCenter(
+ const ProximityInfo *const proximityInfo, const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs, const int keyIndex,
+ const int inputIndex);
+ static float calculateNormalizedSquaredDistance(const ProximityInfo *const proximityInfo,
+ const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs, const int keyIndex, const int inputIndex);
+};
+} // namespace latinime
+#endif // LATINIME_PROXIMITY_INFO_STATE_UTILS_H
diff --git a/third_party/android_prediction/suggest/core/layout/proximity_info_utils.h b/third_party/android_prediction/suggest/core/layout/proximity_info_utils.h
new file mode 100644
index 0000000..77df999
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/layout/proximity_info_utils.h
@@ -0,0 +1,236 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PROXIMITY_INFO_UTILS_H
+#define LATINIME_PROXIMITY_INFO_UTILS_H
+
+#include <cmath>
+#include <unordered_map>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/layout/additional_proximity_chars.h"
+#include "third_party/android_prediction/suggest/core/layout/geometry_utils.h"
+#include "third_party/android_prediction/utils/char_utils.h"
+
+namespace latinime {
+class ProximityInfoUtils {
+ public:
+ static AK_FORCE_INLINE int getKeyIndexOf(const int keyCount, const int c,
+ const std::unordered_map<int, int> *const codeToKeyMap) {
+ if (keyCount == 0) {
+ // We do not have the coordinate data
+ return NOT_AN_INDEX;
+ }
+ if (c == NOT_A_CODE_POINT) {
+ return NOT_AN_INDEX;
+ }
+ const int lowerCode = CharUtils::toLowerCase(c);
+ std::unordered_map<int, int>::const_iterator mapPos = codeToKeyMap->find(lowerCode);
+ if (mapPos != codeToKeyMap->end()) {
+ return mapPos->second;
+ }
+ return NOT_AN_INDEX;
+ }
+
+ static AK_FORCE_INLINE void initializeProximities(const int *const inputCodes,
+ const int *const inputXCoordinates, const int *const inputYCoordinates,
+ const int inputSize, const int *const keyXCoordinates,
+ const int *const keyYCoordinates, const int *const keyWidths, const int *keyHeights,
+ const int *const proximityCharsArray, const int cellHeight, const int cellWidth,
+ const int gridWidth, const int mostCommonKeyWidth, const int keyCount,
+ const char *const localeStr,
+ const std::unordered_map<int, int> *const codeToKeyMap, int *inputProximities) {
+ // Initialize
+ // - mInputCodes
+ // - mNormalizedSquaredDistances
+ // TODO: Merge
+ for (int i = 0; i < inputSize; ++i) {
+ const int primaryKey = inputCodes[i];
+ const int x = inputXCoordinates[i];
+ const int y = inputYCoordinates[i];
+ int *proximities = &inputProximities[i * MAX_PROXIMITY_CHARS_SIZE];
+ calculateProximities(keyXCoordinates, keyYCoordinates, keyWidths, keyHeights,
+ proximityCharsArray, cellHeight, cellWidth, gridWidth, mostCommonKeyWidth,
+ keyCount, x, y, primaryKey, localeStr, codeToKeyMap, proximities);
+ }
+
+ if (DEBUG_PROXIMITY_CHARS) {
+ for (int i = 0; i < inputSize; ++i) {
+ AKLOGI("---");
+ for (int j = 0; j < MAX_PROXIMITY_CHARS_SIZE; ++j) {
+ int proximityChar =
+ inputProximities[i * MAX_PROXIMITY_CHARS_SIZE + j];
+ proximityChar += 0;
+ AKLOGI("--- (%d)%c", i, proximityChar);
+ }
+ }
+ }
+ }
+
+ static AK_FORCE_INLINE int getStartIndexFromCoordinates(const int x, const int y,
+ const int cellHeight, const int cellWidth, const int gridWidth) {
+ return ((y / cellHeight) * gridWidth + (x / cellWidth)) * MAX_PROXIMITY_CHARS_SIZE;
+ }
+
+ static inline float getSquaredDistanceFloat(const float x1, const float y1, const float x2,
+ const float y2) {
+ return GeometryUtils::SQUARE_FLOAT(x1 - x2) + GeometryUtils::SQUARE_FLOAT(y1 - y2);
+ }
+
+ static inline float pointToLineSegSquaredDistanceFloat(const float x, const float y,
+ const float x1, const float y1, const float x2, const float y2, const bool extend) {
+ const float ray1x = x - x1;
+ const float ray1y = y - y1;
+ const float ray2x = x2 - x1;
+ const float ray2y = y2 - y1;
+
+ const float dotProduct = ray1x * ray2x + ray1y * ray2y;
+ const float lineLengthSqr = GeometryUtils::SQUARE_FLOAT(ray2x)
+ + GeometryUtils::SQUARE_FLOAT(ray2y);
+ if (lineLengthSqr <= 0.0f) {
+ // Return point to the point distance.
+ return getSquaredDistanceFloat(x, y, x1, y1);
+ }
+ const float projectionLengthSqr = dotProduct / lineLengthSqr;
+
+ float projectionX;
+ float projectionY;
+ if (!extend && projectionLengthSqr < 0.0f) {
+ projectionX = x1;
+ projectionY = y1;
+ } else if (!extend && projectionLengthSqr > 1.0f) {
+ projectionX = x2;
+ projectionY = y2;
+ } else {
+ projectionX = x1 + projectionLengthSqr * ray2x;
+ projectionY = y1 + projectionLengthSqr * ray2y;
+ }
+ return getSquaredDistanceFloat(x, y, projectionX, projectionY);
+ }
+
+ static AK_FORCE_INLINE bool isMatchOrProximityChar(const ProximityType type) {
+ return type == MATCH_CHAR || type == PROXIMITY_CHAR || type == ADDITIONAL_PROXIMITY_CHAR;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(ProximityInfoUtils);
+
+ static bool isOnKey(const int *const keyXCoordinates, const int *const keyYCoordinates,
+ const int *const keyWidths, const int *keyHeights, const int keyId, const int x,
+ const int y) {
+ if (keyId < 0) return true; // NOT_A_ID is -1, but return whenever < 0 just in case
+ const int left = keyXCoordinates[keyId];
+ const int top = keyYCoordinates[keyId];
+ const int right = left + keyWidths[keyId] + 1;
+ const int bottom = top + keyHeights[keyId];
+ return left < right && top < bottom && x >= left && x < right && y >= top && y < bottom;
+ }
+
+ static AK_FORCE_INLINE void calculateProximities(const int *const keyXCoordinates,
+ const int *const keyYCoordinates, const int *const keyWidths, const int *keyHeights,
+ const int *const proximityCharsArray, const int cellHeight, const int cellWidth,
+ const int gridWidth, const int mostCommonKeyWidth, const int keyCount,
+ const int x, const int y, const int primaryKey, const char *const localeStr,
+ const std::unordered_map<int, int> *const codeToKeyMap, int *proximities) {
+ const int mostCommonKeyWidthSquare = mostCommonKeyWidth * mostCommonKeyWidth;
+ int insertPos = 0;
+ proximities[insertPos++] = primaryKey;
+ if (x == NOT_A_COORDINATE || y == NOT_A_COORDINATE) {
+ for (int i = insertPos; i < MAX_PROXIMITY_CHARS_SIZE; ++i) {
+ proximities[i] = NOT_A_CODE_POINT;
+ }
+ return;
+ }
+ const int startIndex = getStartIndexFromCoordinates(x, y, cellHeight, cellWidth, gridWidth);
+ if (startIndex >= 0) {
+ for (int i = 0; i < MAX_PROXIMITY_CHARS_SIZE; ++i) {
+ const int c = proximityCharsArray[startIndex + i];
+ if (c < KEYCODE_SPACE || c == primaryKey) {
+ continue;
+ }
+ const int keyIndex = getKeyIndexOf(keyCount, c, codeToKeyMap);
+ const bool onKey = isOnKey(keyXCoordinates, keyYCoordinates, keyWidths, keyHeights,
+ keyIndex, x, y);
+ const int distance = squaredLengthToEdge(keyXCoordinates, keyYCoordinates,
+ keyWidths, keyHeights, keyIndex, x, y);
+ if (onKey || distance < mostCommonKeyWidthSquare) {
+ proximities[insertPos++] = c;
+ if (insertPos >= MAX_PROXIMITY_CHARS_SIZE) {
+ if (DEBUG_DICT) {
+ ASSERT(false);
+ }
+ return;
+ }
+ }
+ }
+ const int additionalProximitySize =
+ AdditionalProximityChars::getAdditionalCharsSize(localeStr, primaryKey);
+ if (additionalProximitySize > 0) {
+ proximities[insertPos++] = ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE;
+ if (insertPos >= MAX_PROXIMITY_CHARS_SIZE) {
+ if (DEBUG_DICT) {
+ ASSERT(false);
+ }
+ return;
+ }
+
+ const int *additionalProximityChars =
+ AdditionalProximityChars::getAdditionalChars(localeStr, primaryKey);
+ for (int j = 0; j < additionalProximitySize; ++j) {
+ const int ac = additionalProximityChars[j];
+ int k = 0;
+ for (; k < insertPos; ++k) {
+ if (ac == proximities[k]) {
+ break;
+ }
+ }
+ if (k < insertPos) {
+ continue;
+ }
+ proximities[insertPos++] = ac;
+ if (insertPos >= MAX_PROXIMITY_CHARS_SIZE) {
+ if (DEBUG_DICT) {
+ ASSERT(false);
+ }
+ return;
+ }
+ }
+ }
+ }
+ // Add a delimiter for the proximity characters
+ for (int i = insertPos; i < MAX_PROXIMITY_CHARS_SIZE; ++i) {
+ proximities[i] = NOT_A_CODE_POINT;
+ }
+ }
+
+ static int squaredLengthToEdge(const int *const keyXCoordinates,
+ const int *const keyYCoordinates, const int *const keyWidths, const int *keyHeights,
+ const int keyId, const int x, const int y) {
+ // NOT_A_ID is -1, but return whenever < 0 just in case
+ if (keyId < 0) return MAX_VALUE_FOR_WEIGHTING;
+ const int left = keyXCoordinates[keyId];
+ const int top = keyYCoordinates[keyId];
+ const int right = left + keyWidths[keyId];
+ const int bottom = top + keyHeights[keyId];
+ const int edgeX = x < left ? left : (x > right ? right : x);
+ const int edgeY = y < top ? top : (y > bottom ? bottom : y);
+ const int dx = x - edgeX;
+ const int dy = y - edgeY;
+ return dx * dx + dy * dy;
+ }
+};
+} // namespace latinime
+#endif // LATINIME_PROXIMITY_INFO_UTILS_H
diff --git a/third_party/android_prediction/suggest/core/layout/touch_position_correction_utils.h b/third_party/android_prediction/suggest/core/layout/touch_position_correction_utils.h
new file mode 100644
index 0000000..30eaf44
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/layout/touch_position_correction_utils.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_TOUCH_POSITION_CORRECTION_UTILS_H
+#define LATINIME_TOUCH_POSITION_CORRECTION_UTILS_H
+
+#include <algorithm>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/layout/proximity_info_params.h"
+
+namespace latinime {
+class TouchPositionCorrectionUtils {
+ public:
+ static float getSweetSpotFactor(const bool isTouchPositionCorrectionEnabled,
+ const float normalizedSquaredDistance) {
+ // Promote or demote the score according to the distance from the sweet spot
+ static const float A = 0.0f;
+ static const float B = 0.24f;
+ static const float C = 1.20f;
+ static const float R0 = 0.0f;
+ static const float R1 = 0.25f; // Sweet spot
+ static const float R2 = 1.0f;
+ const float x = normalizedSquaredDistance;
+ if (!isTouchPositionCorrectionEnabled) {
+ return std::min(C, x);
+ }
+
+ // factor is a piecewise linear function like:
+ // C -------------.
+ // / .
+ // B / .
+ // -/ .
+ // A _-^ .
+ // .
+ // R0 R1 R2 .
+
+ if (x < R0) {
+ return A;
+ } else if (x < R1) {
+ return (A * (R1 - x) + B * (x - R0)) / (R1 - R0);
+ } else if (x < R2) {
+ return (B * (R2 - x) + C * (x - R1)) / (R2 - R1);
+ } else {
+ return C;
+ }
+ }
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(TouchPositionCorrectionUtils);
+};
+} // namespace latinime
+#endif // LATINIME_TOUCH_POSITION_CORRECTION_UTILS_H
diff --git a/third_party/android_prediction/suggest/core/policy/dictionary_bigrams_structure_policy.h b/third_party/android_prediction/suggest/core/policy/dictionary_bigrams_structure_policy.h
new file mode 100644
index 0000000..25f4f6a
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/policy/dictionary_bigrams_structure_policy.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICTIONARY_BIGRAMS_STRUCTURE_POLICY_H
+#define LATINIME_DICTIONARY_BIGRAMS_STRUCTURE_POLICY_H
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+/*
+ * This class abstracts structure of bigrams.
+ */
+class DictionaryBigramsStructurePolicy {
+ public:
+ virtual ~DictionaryBigramsStructurePolicy() {}
+
+ virtual void getNextBigram(int *const outBigramPos, int *const outProbability,
+ bool *const outHasNext, int *const pos) const = 0;
+ virtual bool skipAllBigrams(int *const pos) const = 0;
+
+ protected:
+ DictionaryBigramsStructurePolicy() {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DictionaryBigramsStructurePolicy);
+};
+} // namespace latinime
+#endif /* LATINIME_DICTIONARY_BIGRAMS_STRUCTURE_POLICY_H */
diff --git a/third_party/android_prediction/suggest/core/policy/dictionary_header_structure_policy.h b/third_party/android_prediction/suggest/core/policy/dictionary_header_structure_policy.h
new file mode 100644
index 0000000..50f4224
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/policy/dictionary_header_structure_policy.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICTIONARY_HEADER_STRUCTURE_POLICY_H
+#define LATINIME_DICTIONARY_HEADER_STRUCTURE_POLICY_H
+
+#include <map>
+#include <vector>
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+/*
+ * This class abstracts structure of dictionaries.
+ * Implement this policy to support additional dictionaries.
+ */
+class DictionaryHeaderStructurePolicy {
+ public:
+ typedef std::map<std::vector<int>, std::vector<int>> AttributeMap;
+
+ virtual ~DictionaryHeaderStructurePolicy() {}
+
+ virtual int getFormatVersionNumber() const = 0;
+
+ virtual int getSize() const = 0;
+
+ virtual const AttributeMap *getAttributeMap() const = 0;
+
+ virtual bool requiresGermanUmlautProcessing() const = 0;
+
+ virtual float getMultiWordCostMultiplier() const = 0;
+
+ virtual void readHeaderValueOrQuestionMark(const char *const key, int *outValue,
+ int outValueSize) const = 0;
+
+ virtual bool shouldBoostExactMatches() const = 0;
+
+ virtual const std::vector<int> *getLocale() const = 0;
+
+ virtual bool supportsBeginningOfSentence() const = 0;
+
+ protected:
+ DictionaryHeaderStructurePolicy() {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DictionaryHeaderStructurePolicy);
+};
+} // namespace latinime
+#endif /* LATINIME_DICTIONARY_HEADER_STRUCTURE_POLICY_H */
diff --git a/third_party/android_prediction/suggest/core/policy/dictionary_shortcuts_structure_policy.h b/third_party/android_prediction/suggest/core/policy/dictionary_shortcuts_structure_policy.h
new file mode 100644
index 0000000..8d5d676
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/policy/dictionary_shortcuts_structure_policy.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICTIONARY_SHORTCUTS_STRUCTURE_POLICY_H
+#define LATINIME_DICTIONARY_SHORTCUTS_STRUCTURE_POLICY_H
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+/*
+ * This class abstracts structure of shortcuts.
+ */
+class DictionaryShortcutsStructurePolicy {
+ public:
+ virtual ~DictionaryShortcutsStructurePolicy() {}
+
+ virtual int getStartPos(const int pos) const = 0;
+
+ virtual void getNextShortcut(const int maxCodePointCount, int *const outCodePoint,
+ int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext,
+ int *const pos) const = 0;
+
+ virtual void skipAllShortcuts(int *const pos) const = 0;
+
+ protected:
+ DictionaryShortcutsStructurePolicy() {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DictionaryShortcutsStructurePolicy);
+};
+} // namespace latinime
+#endif /* LATINIME_DICTIONARY_SHORTCUTS_STRUCTURE_POLICY_H */
diff --git a/third_party/android_prediction/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/third_party/android_prediction/suggest/core/policy/dictionary_structure_with_buffer_policy.h
new file mode 100644
index 0000000..01a40f9
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/policy/dictionary_structure_with_buffer_policy.h
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICTIONARY_STRUCTURE_POLICY_H
+#define LATINIME_DICTIONARY_STRUCTURE_POLICY_H
+
+#include <memory>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/dictionary/property/word_property.h"
+
+namespace latinime {
+
+class DicNode;
+class DicNodeVector;
+class DictionaryHeaderStructurePolicy;
+class DictionaryShortcutsStructurePolicy;
+class NgramListener;
+class PrevWordsInfo;
+class UnigramProperty;
+
+/*
+ * This class abstracts the structure of dictionaries.
+ * Implement this policy to support additional dictionaries.
+ */
+class DictionaryStructureWithBufferPolicy {
+ public:
+ typedef std::unique_ptr<DictionaryStructureWithBufferPolicy> StructurePolicyPtr;
+
+ virtual ~DictionaryStructureWithBufferPolicy() {}
+
+ virtual int getRootPosition() const = 0;
+
+ virtual void createAndGetAllChildDicNodes(const DicNode *const dicNode,
+ DicNodeVector *const childDicNodes) const = 0;
+
+ virtual int getCodePointsAndProbabilityAndReturnCodePointCount(
+ const int nodePos, const int maxCodePointCount, int *const outCodePoints,
+ int *const outUnigramProbability) const = 0;
+
+ virtual int getTerminalPtNodePositionOfWord(const int *const inWord,
+ const int length, const bool forceLowerCaseSearch) const = 0;
+
+ virtual int getProbability(const int unigramProbability,
+ const int bigramProbability) const = 0;
+
+ virtual int getProbabilityOfPtNode(const int *const prevWordsPtNodePos,
+ const int nodePos) const = 0;
+
+ virtual void iterateNgramEntries(const int *const prevWordsPtNodePos,
+ NgramListener *const listener) const = 0;
+
+ virtual int getShortcutPositionOfPtNode(const int nodePos) const = 0;
+
+ virtual const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const = 0;
+
+ virtual const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const = 0;
+
+ // Returns whether the update was success or not.
+ virtual bool addUnigramEntry(const int *const word, const int length,
+ const UnigramProperty *const unigramProperty) = 0;
+
+ // Returns whether the update was success or not.
+ virtual bool removeUnigramEntry(const int *const word, const int length) = 0;
+
+ // Returns whether the update was success or not.
+ virtual bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ const BigramProperty *const bigramProperty) = 0;
+
+ // Returns whether the update was success or not.
+ virtual bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ const int *const word, const int length) = 0;
+
+ // Returns whether the flush was success or not.
+ virtual bool flush(const char *const filePath) = 0;
+
+ // Returns whether the GC and flush were success or not.
+ virtual bool flushWithGC(const char *const filePath) = 0;
+
+ virtual bool needsToRunGC(const bool mindsBlockByGC) const = 0;
+
+ // Currently, this method is used only for testing. You may want to consider creating new
+ // dedicated method instead of this if you want to use this in the production.
+ virtual void getProperty(const char *const query, const int queryLength, char *const outResult,
+ const int maxResultLength) = 0;
+
+ // Used for testing.
+ virtual const WordProperty getWordProperty(const int *const codePonts,
+ const int codePointCount) const = 0;
+
+ // Method to iterate all words in the dictionary.
+ // The returned token has to be used to get the next word. If token is 0, this method newly
+ // starts iterating the dictionary.
+ virtual int getNextWordAndNextToken(const int token, int *const outCodePoints,
+ int *const outCodePointCount) = 0;
+
+ virtual bool isCorrupted() const = 0;
+
+ protected:
+ DictionaryStructureWithBufferPolicy() {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DictionaryStructureWithBufferPolicy);
+};
+} // namespace latinime
+#endif /* LATINIME_DICTIONARY_STRUCTURE_POLICY_H */
diff --git a/third_party/android_prediction/suggest/core/policy/scoring.h b/third_party/android_prediction/suggest/core/policy/scoring.h
new file mode 100644
index 0000000..aaad200
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/policy/scoring.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_SCORING_H
+#define LATINIME_SCORING_H
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+class DicNode;
+class DicTraverseSession;
+class SuggestionResults;
+
+// This class basically tweaks suggestions and distances apart from CompoundDistance
+class Scoring {
+ public:
+ virtual int calculateFinalScore(const float compoundDistance, const int inputSize,
+ const ErrorTypeUtils::ErrorType containedErrorTypes, const bool forceCommit,
+ const bool boostExactMatches) const = 0;
+ virtual void getMostProbableString(const DicTraverseSession *const traverseSession,
+ const float languageWeight, SuggestionResults *const outSuggestionResults) const = 0;
+ virtual float getAdjustedLanguageWeight(DicTraverseSession *const traverseSession,
+ DicNode *const terminals, const int size) const = 0;
+ virtual float getDoubleLetterDemotionDistanceCost(
+ const DicNode *const terminalDicNode) const = 0;
+ virtual bool autoCorrectsToMultiWordSuggestionIfTop() const = 0;
+ virtual bool sameAsTyped(const DicTraverseSession *const traverseSession,
+ const DicNode *const dicNode) const = 0;
+
+ protected:
+ Scoring() {}
+ virtual ~Scoring() {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(Scoring);
+};
+} // namespace latinime
+#endif // LATINIME_SCORING_H
diff --git a/third_party/android_prediction/suggest/core/policy/suggest_policy.h b/third_party/android_prediction/suggest/core/policy/suggest_policy.h
new file mode 100644
index 0000000..098ddc1
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/policy/suggest_policy.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_SUGGEST_POLICY_H
+#define LATINIME_SUGGEST_POLICY_H
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+class Traversal;
+class Scoring;
+class Weighting;
+
+class SuggestPolicy {
+ public:
+ SuggestPolicy() {}
+ virtual ~SuggestPolicy() {}
+ virtual const Traversal *getTraversal() const = 0;
+ virtual const Scoring *getScoring() const = 0;
+ virtual const Weighting *getWeighting() const = 0;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(SuggestPolicy);
+};
+} // namespace latinime
+#endif // LATINIME_SUGGEST_POLICY_H
diff --git a/third_party/android_prediction/suggest/core/policy/traversal.h b/third_party/android_prediction/suggest/core/policy/traversal.h
new file mode 100644
index 0000000..227091f
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/policy/traversal.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_TRAVERSAL_H
+#define LATINIME_TRAVERSAL_H
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+class DicTraverseSession;
+
+class Traversal {
+ public:
+ virtual int getMaxPointerCount() const = 0;
+ virtual bool allowsErrorCorrections(const DicNode *const dicNode) const = 0;
+ virtual bool isOmission(const DicTraverseSession *const traverseSession,
+ const DicNode *const dicNode, const DicNode *const childDicNode,
+ const bool allowsErrorCorrections) const = 0;
+ virtual bool isSpaceSubstitutionTerminal(const DicTraverseSession *const traverseSession,
+ const DicNode *const dicNode) const = 0;
+ virtual bool isSpaceOmissionTerminal(const DicTraverseSession *const traverseSession,
+ const DicNode *const dicNode) const = 0;
+ virtual bool shouldDepthLevelCache(const DicTraverseSession *const traverseSession) const = 0;
+ virtual bool shouldNodeLevelCache(const DicTraverseSession *const traverseSession,
+ const DicNode *const dicNode) const = 0;
+ virtual bool canDoLookAheadCorrection(const DicTraverseSession *const traverseSession,
+ const DicNode *const dicNode) const = 0;
+ virtual ProximityType getProximityType(const DicTraverseSession *const traverseSession,
+ const DicNode *const dicNode, const DicNode *const childDicNode) const = 0;
+ virtual bool needsToTraverseAllUserInput() const = 0;
+ virtual float getMaxSpatialDistance() const = 0;
+ virtual int getDefaultExpandDicNodeSize() const = 0;
+ virtual int getMaxCacheSize(const int inputSize) const = 0;
+ virtual int getTerminalCacheSize() const = 0;
+ virtual bool isPossibleOmissionChildNode(const DicTraverseSession *const traverseSession,
+ const DicNode *const parentDicNode, const DicNode *const dicNode) const = 0;
+ virtual bool isGoodToTraverseNextWord(const DicNode *const dicNode) const = 0;
+
+ protected:
+ Traversal() {}
+ virtual ~Traversal() {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(Traversal);
+};
+} // namespace latinime
+#endif // LATINIME_TRAVERSAL_H
diff --git a/third_party/android_prediction/suggest/core/policy/weighting.cpp b/third_party/android_prediction/suggest/core/policy/weighting.cpp
new file mode 100644
index 0000000..1db2625
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/policy/weighting.cpp
@@ -0,0 +1,202 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/core/policy/weighting.h"
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/dicnode/dic_node.h"
+#include "third_party/android_prediction/suggest/core/dicnode/dic_node_profiler.h"
+#include "third_party/android_prediction/suggest/core/dicnode/dic_node_utils.h"
+#include "third_party/android_prediction/suggest/core/dictionary/error_type_utils.h"
+#include "third_party/android_prediction/suggest/core/session/dic_traverse_session.h"
+
+namespace latinime {
+
+class MultiBigramMap;
+
+static inline void profile(const CorrectionType correctionType, DicNode *const node) {
+#if DEBUG_DICT
+ switch (correctionType) {
+ case CT_OMISSION:
+ PROF_OMISSION(node->mProfiler);
+ return;
+ case CT_ADDITIONAL_PROXIMITY:
+ PROF_ADDITIONAL_PROXIMITY(node->mProfiler);
+ return;
+ case CT_SUBSTITUTION:
+ PROF_SUBSTITUTION(node->mProfiler);
+ return;
+ case CT_NEW_WORD_SPACE_OMISSION:
+ PROF_NEW_WORD(node->mProfiler);
+ return;
+ case CT_MATCH:
+ PROF_MATCH(node->mProfiler);
+ return;
+ case CT_COMPLETION:
+ PROF_COMPLETION(node->mProfiler);
+ return;
+ case CT_TERMINAL:
+ PROF_TERMINAL(node->mProfiler);
+ return;
+ case CT_TERMINAL_INSERTION:
+ PROF_TERMINAL_INSERTION(node->mProfiler);
+ return;
+ case CT_NEW_WORD_SPACE_SUBSTITUTION:
+ PROF_SPACE_SUBSTITUTION(node->mProfiler);
+ return;
+ case CT_INSERTION:
+ PROF_INSERTION(node->mProfiler);
+ return;
+ case CT_TRANSPOSITION:
+ PROF_TRANSPOSITION(node->mProfiler);
+ return;
+ default:
+ // do nothing
+ return;
+ }
+#else
+ // do nothing
+#endif
+}
+
+/* static */ void Weighting::addCostAndForwardInputIndex(const Weighting *const weighting,
+ const CorrectionType correctionType, const DicTraverseSession *const traverseSession,
+ const DicNode *const parentDicNode, DicNode *const dicNode,
+ MultiBigramMap *const multiBigramMap) {
+ const int inputSize = traverseSession->getInputSize();
+ DicNode_InputStateG inputStateG;
+ inputStateG.mNeedsToUpdateInputStateG = false; // Don't use input info by default
+ const float spatialCost = Weighting::getSpatialCost(weighting, correctionType,
+ traverseSession, parentDicNode, dicNode, &inputStateG);
+ const float languageCost = Weighting::getLanguageCost(weighting, correctionType,
+ traverseSession, parentDicNode, dicNode, multiBigramMap);
+ const ErrorTypeUtils::ErrorType errorType = weighting->getErrorType(correctionType,
+ traverseSession, parentDicNode, dicNode);
+ profile(correctionType, dicNode);
+ if (inputStateG.mNeedsToUpdateInputStateG) {
+ dicNode->updateInputIndexG(&inputStateG);
+ } else {
+ dicNode->forwardInputIndex(0, getForwardInputCount(correctionType),
+ (correctionType == CT_TRANSPOSITION));
+ }
+ dicNode->addCost(spatialCost, languageCost, weighting->needsToNormalizeCompoundDistance(),
+ inputSize, errorType);
+ if (CT_NEW_WORD_SPACE_OMISSION == correctionType) {
+ // When we are on a terminal, we save the current distance for evaluating
+ // when to auto-commit partial suggestions.
+ dicNode->saveNormalizedCompoundDistanceAfterFirstWordIfNoneYet();
+ }
+}
+
+/* static */ float Weighting::getSpatialCost(const Weighting *const weighting,
+ const CorrectionType correctionType, const DicTraverseSession *const traverseSession,
+ const DicNode *const parentDicNode, const DicNode *const dicNode,
+ DicNode_InputStateG *const inputStateG) {
+ switch(correctionType) {
+ case CT_OMISSION:
+ return weighting->getOmissionCost(parentDicNode, dicNode);
+ case CT_ADDITIONAL_PROXIMITY:
+ // only used for typing
+ return weighting->getAdditionalProximityCost();
+ case CT_SUBSTITUTION:
+ // only used for typing
+ return weighting->getSubstitutionCost();
+ case CT_NEW_WORD_SPACE_OMISSION:
+ return weighting->getNewWordSpatialCost(traverseSession, dicNode, inputStateG);
+ case CT_MATCH:
+ return weighting->getMatchedCost(traverseSession, dicNode, inputStateG);
+ case CT_COMPLETION:
+ return weighting->getCompletionCost(traverseSession, dicNode);
+ case CT_TERMINAL:
+ return weighting->getTerminalSpatialCost(traverseSession, dicNode);
+ case CT_TERMINAL_INSERTION:
+ return weighting->getTerminalInsertionCost(traverseSession, dicNode);
+ case CT_NEW_WORD_SPACE_SUBSTITUTION:
+ return weighting->getSpaceSubstitutionCost(traverseSession, dicNode);
+ case CT_INSERTION:
+ return weighting->getInsertionCost(traverseSession, parentDicNode, dicNode);
+ case CT_TRANSPOSITION:
+ return weighting->getTranspositionCost(traverseSession, parentDicNode, dicNode);
+ default:
+ return 0.0f;
+ }
+}
+
+/* static */ float Weighting::getLanguageCost(const Weighting *const weighting,
+ const CorrectionType correctionType, const DicTraverseSession *const traverseSession,
+ const DicNode *const parentDicNode, const DicNode *const dicNode,
+ MultiBigramMap *const multiBigramMap) {
+ switch(correctionType) {
+ case CT_OMISSION:
+ return 0.0f;
+ case CT_SUBSTITUTION:
+ return 0.0f;
+ case CT_NEW_WORD_SPACE_OMISSION:
+ return weighting->getNewWordBigramLanguageCost(
+ traverseSession, parentDicNode, multiBigramMap);
+ case CT_MATCH:
+ return 0.0f;
+ case CT_COMPLETION:
+ return 0.0f;
+ case CT_TERMINAL: {
+ const float languageImprobability =
+ DicNodeUtils::getBigramNodeImprobability(
+ traverseSession->getDictionaryStructurePolicy(), dicNode, multiBigramMap);
+ return weighting->getTerminalLanguageCost(traverseSession, dicNode, languageImprobability);
+ }
+ case CT_TERMINAL_INSERTION:
+ return 0.0f;
+ case CT_NEW_WORD_SPACE_SUBSTITUTION:
+ return weighting->getNewWordBigramLanguageCost(
+ traverseSession, parentDicNode, multiBigramMap);
+ case CT_INSERTION:
+ return 0.0f;
+ case CT_TRANSPOSITION:
+ return 0.0f;
+ default:
+ return 0.0f;
+ }
+}
+
+/* static */ int Weighting::getForwardInputCount(const CorrectionType correctionType) {
+ switch(correctionType) {
+ case CT_OMISSION:
+ return 0;
+ case CT_ADDITIONAL_PROXIMITY:
+ return 0; /* 0 because CT_MATCH will be called */
+ case CT_SUBSTITUTION:
+ return 0; /* 0 because CT_MATCH will be called */
+ case CT_NEW_WORD_SPACE_OMISSION:
+ return 0;
+ case CT_MATCH:
+ return 1;
+ case CT_COMPLETION:
+ return 1;
+ case CT_TERMINAL:
+ return 0;
+ case CT_TERMINAL_INSERTION:
+ return 1;
+ case CT_NEW_WORD_SPACE_SUBSTITUTION:
+ return 1;
+ case CT_INSERTION:
+ return 2; /* look ahead + skip the current char */
+ case CT_TRANSPOSITION:
+ return 2; /* look ahead + skip the current char */
+ default:
+ return 0;
+ }
+}
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/core/policy/weighting.h b/third_party/android_prediction/suggest/core/policy/weighting.h
new file mode 100644
index 0000000..9f266ea
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/policy/weighting.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_WEIGHTING_H
+#define LATINIME_WEIGHTING_H
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/dictionary/error_type_utils.h"
+
+namespace latinime {
+
+class DicNode;
+class DicTraverseSession;
+struct DicNode_InputStateG;
+class MultiBigramMap;
+
+class Weighting {
+ public:
+ static void addCostAndForwardInputIndex(const Weighting *const weighting,
+ const CorrectionType correctionType,
+ const DicTraverseSession *const traverseSession,
+ const DicNode *const parentDicNode, DicNode *const dicNode,
+ MultiBigramMap *const multiBigramMap);
+
+ protected:
+ virtual float getTerminalSpatialCost(const DicTraverseSession *const traverseSession,
+ const DicNode *const dicNode) const = 0;
+
+ virtual float getOmissionCost(
+ const DicNode *const parentDicNode, const DicNode *const dicNode) const = 0;
+
+ virtual float getMatchedCost(
+ const DicTraverseSession *const traverseSession, const DicNode *const dicNode,
+ DicNode_InputStateG *inputStateG) const = 0;
+
+ virtual bool isProximityDicNode(const DicTraverseSession *const traverseSession,
+ const DicNode *const dicNode) const = 0;
+
+ virtual float getTranspositionCost(
+ const DicTraverseSession *const traverseSession, const DicNode *const parentDicNode,
+ const DicNode *const dicNode) const = 0;
+
+ virtual float getInsertionCost(
+ const DicTraverseSession *const traverseSession,
+ const DicNode *const parentDicNode, const DicNode *const dicNode) const = 0;
+
+ virtual float getNewWordSpatialCost(const DicTraverseSession *const traverseSession,
+ const DicNode *const dicNode, DicNode_InputStateG *const inputStateG) const = 0;
+
+ virtual float getNewWordBigramLanguageCost(
+ const DicTraverseSession *const traverseSession, const DicNode *const dicNode,
+ MultiBigramMap *const multiBigramMap) const = 0;
+
+ virtual float getCompletionCost(
+ const DicTraverseSession *const traverseSession,
+ const DicNode *const dicNode) const = 0;
+
+ virtual float getTerminalInsertionCost(
+ const DicTraverseSession *const traverseSession,
+ const DicNode *const dicNode) const = 0;
+
+ virtual float getTerminalLanguageCost(
+ const DicTraverseSession *const traverseSession, const DicNode *const dicNode,
+ float dicNodeLanguageImprobability) const = 0;
+
+ virtual bool needsToNormalizeCompoundDistance() const = 0;
+
+ virtual float getAdditionalProximityCost() const = 0;
+
+ virtual float getSubstitutionCost() const = 0;
+
+ virtual float getSpaceSubstitutionCost(const DicTraverseSession *const traverseSession,
+ const DicNode *const dicNode) const = 0;
+
+ virtual ErrorTypeUtils::ErrorType getErrorType(const CorrectionType correctionType,
+ const DicTraverseSession *const traverseSession,
+ const DicNode *const parentDicNode, const DicNode *const dicNode) const = 0;
+
+ Weighting() {}
+ virtual ~Weighting() {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(Weighting);
+
+ static float getSpatialCost(const Weighting *const weighting,
+ const CorrectionType correctionType, const DicTraverseSession *const traverseSession,
+ const DicNode *const parentDicNode, const DicNode *const dicNode,
+ DicNode_InputStateG *const inputStateG);
+ static float getLanguageCost(const Weighting *const weighting,
+ const CorrectionType correctionType, const DicTraverseSession *const traverseSession,
+ const DicNode *const parentDicNode, const DicNode *const dicNode,
+ MultiBigramMap *const multiBigramMap);
+ // TODO: Move to TypingWeighting and GestureWeighting?
+ static int getForwardInputCount(const CorrectionType correctionType);
+};
+} // namespace latinime
+#endif // LATINIME_WEIGHTING_H
diff --git a/third_party/android_prediction/suggest/core/result/suggested_word.h b/third_party/android_prediction/suggest/core/result/suggested_word.h
new file mode 100644
index 0000000..1595624
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/result/suggested_word.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_SUGGESTED_WORD_H
+#define LATINIME_SUGGESTED_WORD_H
+
+#include <vector>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/dictionary/dictionary.h"
+
+namespace latinime {
+
+class SuggestedWord {
+ public:
+ class Comparator {
+ public:
+ bool operator()(const SuggestedWord &left, const SuggestedWord &right) {
+ if (left.getScore() != right.getScore()) {
+ return left.getScore() > right.getScore();
+ }
+ return left.getCodePointCount() < right.getCodePointCount();
+ }
+
+ private:
+ DISALLOW_ASSIGNMENT_OPERATOR(Comparator);
+ };
+
+ SuggestedWord(const int *const codePoints, const int codePointCount,
+ const int score, const int type, const int indexToPartialCommit,
+ const int autoCommitFirstWordConfidence)
+ : mCodePoints(codePoints, codePoints + codePointCount), mScore(score),
+ mType(type), mIndexToPartialCommit(indexToPartialCommit),
+ mAutoCommitFirstWordConfidence(autoCommitFirstWordConfidence) {}
+
+ const int *getCodePoint() const {
+ return &mCodePoints.at(0);
+ }
+
+ int getCodePointCount() const {
+ return mCodePoints.size();
+ }
+
+ int getScore() const {
+ return mScore;
+ }
+
+ int getType() const {
+ return mType;
+ }
+
+ int getIndexToPartialCommit() const {
+ return mIndexToPartialCommit;
+ }
+
+ int getAutoCommitFirstWordConfidence() const {
+ return mAutoCommitFirstWordConfidence;
+ }
+
+ private:
+ DISALLOW_DEFAULT_CONSTRUCTOR(SuggestedWord);
+
+ std::vector<int> mCodePoints;
+ int mScore;
+ int mType;
+ int mIndexToPartialCommit;
+ int mAutoCommitFirstWordConfidence;
+};
+} // namespace latinime
+#endif /* LATINIME_SUGGESTED_WORD_H */
diff --git a/third_party/android_prediction/suggest/core/result/suggestion_results.cpp b/third_party/android_prediction/suggest/core/result/suggestion_results.cpp
new file mode 100644
index 0000000..4f66236
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/result/suggestion_results.cpp
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/core/result/suggestion_results.h"
+
+namespace latinime {
+
+void SuggestionResults::addPrediction(const int *const codePoints, const int codePointCount,
+ const int probability) {
+ if (probability == NOT_A_PROBABILITY) {
+ // Invalid word.
+ return;
+ }
+ addSuggestion(codePoints, codePointCount, probability, Dictionary::KIND_PREDICTION,
+ NOT_AN_INDEX, NOT_A_FIRST_WORD_CONFIDENCE);
+}
+
+void SuggestionResults::addSuggestion(const int *const codePoints, const int codePointCount,
+ const int score, const int type, const int indexToPartialCommit,
+ const int autocimmitFirstWordConfindence) {
+ if (codePointCount <= 0 || codePointCount > MAX_WORD_LENGTH) {
+ // Invalid word.
+ AKLOGE("Invalid word is added to the suggestion results. codePointCount: %d",
+ codePointCount);
+ return;
+ }
+ if (getSuggestionCount() >= mMaxSuggestionCount) {
+ const SuggestedWord &mWorstSuggestion = mSuggestedWords.top();
+ if (score > mWorstSuggestion.getScore() || (score == mWorstSuggestion.getScore()
+ && codePointCount < mWorstSuggestion.getCodePointCount())) {
+ mSuggestedWords.pop();
+ } else {
+ return;
+ }
+ }
+ mSuggestedWords.push(SuggestedWord(codePoints, codePointCount, score, type,
+ indexToPartialCommit, autocimmitFirstWordConfindence));
+}
+
+void SuggestionResults::getSortedScores(int *const outScores) const {
+ auto copyOfSuggestedWords = mSuggestedWords;
+ while (!copyOfSuggestedWords.empty()) {
+ const SuggestedWord &suggestedWord = copyOfSuggestedWords.top();
+ outScores[copyOfSuggestedWords.size() - 1] = suggestedWord.getScore();
+ copyOfSuggestedWords.pop();
+ }
+}
+
+void SuggestionResults::dumpSuggestions() const {
+ AKLOGE("language weight: %f", mLanguageWeight);
+ std::vector<SuggestedWord> suggestedWords;
+ auto copyOfSuggestedWords = mSuggestedWords;
+ while (!copyOfSuggestedWords.empty()) {
+ suggestedWords.push_back(copyOfSuggestedWords.top());
+ copyOfSuggestedWords.pop();
+ }
+ int index = 0;
+ for (auto it = suggestedWords.rbegin(); it != suggestedWords.rend(); ++it) {
+ DUMP_SUGGESTION(it->getCodePoint(), it->getCodePointCount(), index, it->getScore());
+ index++;
+ }
+}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/core/result/suggestion_results.h b/third_party/android_prediction/suggest/core/result/suggestion_results.h
new file mode 100644
index 0000000..e07de8b
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/result/suggestion_results.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_SUGGESTION_RESULTS_H
+#define LATINIME_SUGGESTION_RESULTS_H
+
+#include <queue>
+#include <vector>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/result/suggested_word.h"
+
+namespace latinime {
+
+class SuggestionResults {
+ public:
+ explicit SuggestionResults(const int maxSuggestionCount)
+ : mSuggestedWords(), mMaxSuggestionCount(maxSuggestionCount),
+ mLanguageWeight(NOT_A_LANGUAGE_WEIGHT) {}
+
+ void addPrediction(const int *const codePoints, const int codePointCount, const int score);
+ void addSuggestion(const int *const codePoints, const int codePointCount,
+ const int score, const int type, const int indexToPartialCommit,
+ const int autocimmitFirstWordConfindence);
+ void getSortedScores(int *const outScores) const;
+ void dumpSuggestions() const;
+
+ void setLanguageWeight(const float languageWeight) {
+ mLanguageWeight = languageWeight;
+ }
+
+ int getSuggestionCount() const {
+ return mSuggestedWords.size();
+ }
+
+ std::priority_queue<
+ SuggestedWord, std::vector<SuggestedWord>, SuggestedWord::Comparator> mSuggestedWords;
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(SuggestionResults);
+
+ const int mMaxSuggestionCount;
+ float mLanguageWeight;
+};
+} // namespace latinime
+#endif // LATINIME_SUGGESTION_RESULTS_H
diff --git a/third_party/android_prediction/suggest/core/result/suggestions_output_utils.cpp b/third_party/android_prediction/suggest/core/result/suggestions_output_utils.cpp
new file mode 100644
index 0000000..727bcbf
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/result/suggestions_output_utils.cpp
@@ -0,0 +1,229 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/core/result/suggestions_output_utils.h"
+
+#include <algorithm>
+#include <vector>
+
+#include "third_party/android_prediction/suggest/core/dicnode/dic_node.h"
+#include "third_party/android_prediction/suggest/core/dicnode/dic_node_utils.h"
+#include "third_party/android_prediction/suggest/core/dictionary/binary_dictionary_shortcut_iterator.h"
+#include "third_party/android_prediction/suggest/core/dictionary/error_type_utils.h"
+#include "third_party/android_prediction/suggest/core/policy/scoring.h"
+#include "third_party/android_prediction/suggest/core/result/suggestion_results.h"
+#include "third_party/android_prediction/suggest/core/session/dic_traverse_session.h"
+#include "third_party/android_prediction/suggest/core/suggest_options.h"
+
+namespace latinime {
+
+const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
+
+/* static */ void SuggestionsOutputUtils::outputSuggestions(
+ const Scoring *const scoringPolicy, DicTraverseSession *traverseSession,
+ const float languageWeight, SuggestionResults *const outSuggestionResults) {
+#if DEBUG_EVALUATE_MOST_PROBABLE_STRING
+ const int terminalSize = 0;
+#else
+ const int terminalSize = traverseSession->getDicTraverseCache()->terminalSize();
+#endif
+ std::vector<DicNode> terminals(terminalSize);
+ for (int index = terminalSize - 1; index >= 0; --index) {
+ traverseSession->getDicTraverseCache()->popTerminal(&terminals[index]);
+ }
+ // Compute a language weight when an invalid language weight is passed.
+ // NOT_A_LANGUAGE_WEIGHT (-1) is assumed as an invalid language weight.
+ const float languageWeightToOutputSuggestions = (languageWeight < 0.0f) ?
+ scoringPolicy->getAdjustedLanguageWeight(
+ traverseSession, terminals.data(), terminalSize) : languageWeight;
+ outSuggestionResults->setLanguageWeight(languageWeightToOutputSuggestions);
+ // Force autocorrection for obvious long multi-word suggestions when the top suggestion is
+ // a long multiple words suggestion.
+ // TODO: Implement a smarter auto-commit method for handling multi-word suggestions.
+ const bool forceCommitMultiWords = scoringPolicy->autoCorrectsToMultiWordSuggestionIfTop()
+ && (traverseSession->getInputSize() >= MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT
+ && !terminals.empty() && terminals.front().hasMultipleWords());
+ // TODO: have partial commit work even with multiple pointers.
+ const bool outputSecondWordFirstLetterInputIndex =
+ traverseSession->isOnlyOnePointerUsed(0 /* pointerId */);
+ const bool boostExactMatches = traverseSession->getDictionaryStructurePolicy()->
+ getHeaderStructurePolicy()->shouldBoostExactMatches();
+
+ // Output suggestion results here
+ for (auto &terminalDicNode : terminals) {
+ outputSuggestionsOfDicNode(scoringPolicy, traverseSession, &terminalDicNode,
+ languageWeightToOutputSuggestions, boostExactMatches, forceCommitMultiWords,
+ outputSecondWordFirstLetterInputIndex, outSuggestionResults);
+ }
+ scoringPolicy->getMostProbableString(traverseSession, languageWeightToOutputSuggestions,
+ outSuggestionResults);
+}
+
+/* static */ void SuggestionsOutputUtils::outputSuggestionsOfDicNode(
+ const Scoring *const scoringPolicy, DicTraverseSession *traverseSession,
+ const DicNode *const terminalDicNode, const float languageWeight,
+ const bool boostExactMatches, const bool forceCommitMultiWords,
+ const bool outputSecondWordFirstLetterInputIndex,
+ SuggestionResults *const outSuggestionResults) {
+ if (DEBUG_GEO_FULL) {
+ terminalDicNode->dump("OUT:");
+ }
+ const float doubleLetterCost =
+ scoringPolicy->getDoubleLetterDemotionDistanceCost(terminalDicNode);
+ const float compoundDistance = terminalDicNode->getCompoundDistance(languageWeight)
+ + doubleLetterCost;
+ const bool isPossiblyOffensiveWord =
+ traverseSession->getDictionaryStructurePolicy()->getProbability(
+ terminalDicNode->getProbability(), NOT_A_PROBABILITY) <= 0;
+ const bool isExactMatch =
+ ErrorTypeUtils::isExactMatch(terminalDicNode->getContainedErrorTypes());
+ const bool isExactMatchWithIntentionalOmission =
+ ErrorTypeUtils::isExactMatchWithIntentionalOmission(
+ terminalDicNode->getContainedErrorTypes());
+ const bool isFirstCharUppercase = terminalDicNode->isFirstCharUppercase();
+ // Heuristic: We exclude probability=0 first-char-uppercase words from exact match.
+ // (e.g. "AMD" and "and")
+ const bool isSafeExactMatch = isExactMatch
+ && !(isPossiblyOffensiveWord && isFirstCharUppercase);
+ const int outputTypeFlags =
+ (isPossiblyOffensiveWord ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0)
+ | ((isSafeExactMatch && boostExactMatches) ? Dictionary::KIND_FLAG_EXACT_MATCH : 0)
+ | (isExactMatchWithIntentionalOmission ?
+ Dictionary::KIND_FLAG_EXACT_MATCH_WITH_INTENTIONAL_OMISSION : 0);
+
+ // Entries that are blacklisted or do not represent a word should not be output.
+ const bool isValidWord = !terminalDicNode->isBlacklistedOrNotAWord();
+ // When we have to block offensive words, non-exact matched offensive words should not be
+ // output.
+ const bool blockOffensiveWords = traverseSession->getSuggestOptions()->blockOffensiveWords();
+ const bool isBlockedOffensiveWord = blockOffensiveWords && isPossiblyOffensiveWord
+ && !isSafeExactMatch;
+
+ // Increase output score of top typing suggestion to ensure autocorrection.
+ // TODO: Better integration with java side autocorrection logic.
+ const int finalScore = scoringPolicy->calculateFinalScore(
+ compoundDistance, traverseSession->getInputSize(),
+ terminalDicNode->getContainedErrorTypes(),
+ (forceCommitMultiWords && terminalDicNode->hasMultipleWords()),
+ boostExactMatches);
+
+ // Don't output invalid or blocked offensive words. However, we still need to submit their
+ // shortcuts if any.
+ if (isValidWord && !isBlockedOffensiveWord) {
+ int codePoints[MAX_WORD_LENGTH];
+ terminalDicNode->outputResult(codePoints);
+ const int indexToPartialCommit = outputSecondWordFirstLetterInputIndex ?
+ terminalDicNode->getSecondWordFirstInputIndex(
+ traverseSession->getProximityInfoState(0)) :
+ NOT_AN_INDEX;
+ outSuggestionResults->addSuggestion(codePoints,
+ terminalDicNode->getTotalNodeCodePointCount(),
+ finalScore, Dictionary::KIND_CORRECTION | outputTypeFlags,
+ indexToPartialCommit, computeFirstWordConfidence(terminalDicNode));
+ }
+
+ // Output shortcuts.
+ // Shortcut is not supported for multiple words suggestions.
+ // TODO: Check shortcuts during traversal for multiple words suggestions.
+ if (!terminalDicNode->hasMultipleWords()) {
+ BinaryDictionaryShortcutIterator shortcutIt(
+ traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(),
+ traverseSession->getDictionaryStructurePolicy()
+ ->getShortcutPositionOfPtNode(terminalDicNode->getPtNodePos()));
+ const bool sameAsTyped = scoringPolicy->sameAsTyped(traverseSession, terminalDicNode);
+ outputShortcuts(&shortcutIt, finalScore, sameAsTyped, outSuggestionResults);
+ }
+}
+
+/* static */ int SuggestionsOutputUtils::computeFirstWordConfidence(
+ const DicNode *const terminalDicNode) {
+ // Get the number of spaces in the first suggestion
+ const int spaceCount = terminalDicNode->getTotalNodeSpaceCount();
+ // Get the number of characters in the first suggestion
+ const int length = terminalDicNode->getTotalNodeCodePointCount();
+ // Get the distance for the first word of the suggestion
+ const float distance = terminalDicNode->getNormalizedCompoundDistanceAfterFirstWord();
+
+ // Arbitrarily, we give a score whose useful values range from 0 to 1,000,000.
+ // 1,000,000 will be the cutoff to auto-commit. It's fine if the number is under 0 or
+ // above 1,000,000 : under 0 just means it's very bad to commit, and above 1,000,000 means
+ // we are very confident.
+ // Expected space count is 1 ~ 5
+ static const int MIN_EXPECTED_SPACE_COUNT = 1;
+ static const int MAX_EXPECTED_SPACE_COUNT = 5;
+ // Expected length is about 4 ~ 30
+ static const int MIN_EXPECTED_LENGTH = 4;
+ static const int MAX_EXPECTED_LENGTH = 30;
+ // Expected distance is about 0.2 ~ 2.0, but consider 0.0 ~ 2.0
+ static const float MIN_EXPECTED_DISTANCE = 0.0;
+ static const float MAX_EXPECTED_DISTANCE = 2.0;
+ // This is not strict: it's where most stuff will be falling, but it's still fine if it's
+ // outside these values. We want to output a value that reflects all of these. Each factor
+ // contributes a bit.
+
+ // We need at least a space.
+ if (spaceCount < 1) return NOT_A_FIRST_WORD_CONFIDENCE;
+
+ // The smaller the edit distance, the higher the contribution. MIN_EXPECTED_DISTANCE means 0
+ // contribution, while MAX_EXPECTED_DISTANCE means full contribution according to the
+ // weight of the distance. Clamp to avoid overflows.
+ const float clampedDistance = distance < MIN_EXPECTED_DISTANCE ? MIN_EXPECTED_DISTANCE
+ : distance > MAX_EXPECTED_DISTANCE ? MAX_EXPECTED_DISTANCE : distance;
+ const int distanceContribution = DISTANCE_WEIGHT_FOR_AUTO_COMMIT
+ * (MAX_EXPECTED_DISTANCE - clampedDistance)
+ / (MAX_EXPECTED_DISTANCE - MIN_EXPECTED_DISTANCE);
+ // The larger the suggestion length, the larger the contribution. MIN_EXPECTED_LENGTH is no
+ // contribution, MAX_EXPECTED_LENGTH is full contribution according to the weight of the
+ // length. Length is guaranteed to be between 1 and 48, so we don't need to clamp.
+ const int lengthContribution = LENGTH_WEIGHT_FOR_AUTO_COMMIT
+ * (length - MIN_EXPECTED_LENGTH) / (MAX_EXPECTED_LENGTH - MIN_EXPECTED_LENGTH);
+ // The more spaces, the larger the contribution. MIN_EXPECTED_SPACE_COUNT space is no
+ // contribution, MAX_EXPECTED_SPACE_COUNT spaces is full contribution according to the
+ // weight of the space count.
+ const int spaceContribution = SPACE_COUNT_WEIGHT_FOR_AUTO_COMMIT
+ * (spaceCount - MIN_EXPECTED_SPACE_COUNT)
+ / (MAX_EXPECTED_SPACE_COUNT - MIN_EXPECTED_SPACE_COUNT);
+
+ return distanceContribution + lengthContribution + spaceContribution;
+}
+
+/* static */ void SuggestionsOutputUtils::outputShortcuts(
+ BinaryDictionaryShortcutIterator *const shortcutIt, const int finalScore,
+ const bool sameAsTyped, SuggestionResults *const outSuggestionResults) {
+ int shortcutTarget[MAX_WORD_LENGTH];
+ while (shortcutIt->hasNextShortcutTarget()) {
+ bool isWhilelist;
+ int shortcutTargetStringLength;
+ shortcutIt->nextShortcutTarget(MAX_WORD_LENGTH, shortcutTarget,
+ &shortcutTargetStringLength, &isWhilelist);
+ int shortcutScore;
+ int kind;
+ if (isWhilelist && sameAsTyped) {
+ shortcutScore = S_INT_MAX;
+ kind = Dictionary::KIND_WHITELIST;
+ } else {
+ // shortcut entry's score == its base entry's score - 1
+ shortcutScore = finalScore;
+ // Protection against int underflow
+ shortcutScore = std::max(S_INT_MIN + 1, shortcutScore) - 1;
+ kind = Dictionary::KIND_SHORTCUT;
+ }
+ outSuggestionResults->addSuggestion(shortcutTarget, shortcutTargetStringLength,
+ std::max(S_INT_MIN + 1, shortcutScore) - 1, kind, NOT_AN_INDEX,
+ NOT_A_FIRST_WORD_CONFIDENCE);
+ }
+}
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/core/result/suggestions_output_utils.h b/third_party/android_prediction/suggest/core/result/suggestions_output_utils.h
new file mode 100644
index 0000000..bcc9c45
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/result/suggestions_output_utils.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_SUGGESTIONS_OUTPUT_UTILS
+#define LATINIME_SUGGESTIONS_OUTPUT_UTILS
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+class BinaryDictionaryShortcutIterator;
+class DicNode;
+class DicTraverseSession;
+class Scoring;
+class SuggestionResults;
+
+class SuggestionsOutputUtils {
+ public:
+ /**
+ * Outputs the final list of suggestions (i.e., terminal nodes).
+ */
+ static void outputSuggestions(const Scoring *const scoringPolicy,
+ DicTraverseSession *traverseSession, const float languageWeight,
+ SuggestionResults *const outSuggestionResults);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(SuggestionsOutputUtils);
+
+ // Inputs longer than this will autocorrect if the suggestion is multi-word
+ static const int MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT;
+
+ static void outputSuggestionsOfDicNode(const Scoring *const scoringPolicy,
+ DicTraverseSession *traverseSession, const DicNode *const terminalDicNode,
+ const float languageWeight, const bool boostExactMatches,
+ const bool forceCommitMultiWords, const bool outputSecondWordFirstLetterInputIndex,
+ SuggestionResults *const outSuggestionResults);
+ static void outputShortcuts(BinaryDictionaryShortcutIterator *const shortcutIt,
+ const int finalScore, const bool sameAsTyped,
+ SuggestionResults *const outSuggestionResults);
+ static int computeFirstWordConfidence(const DicNode *const terminalDicNode);
+};
+} // namespace latinime
+#endif // LATINIME_SUGGESTIONS_OUTPUT_UTILS
diff --git a/third_party/android_prediction/suggest/core/session/dic_traverse_session.cpp b/third_party/android_prediction/suggest/core/session/dic_traverse_session.cpp
new file mode 100644
index 0000000..d1a61f6
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/session/dic_traverse_session.cpp
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/core/session/dic_traverse_session.h"
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/dictionary/dictionary.h"
+#include "third_party/android_prediction/suggest/core/policy/dictionary_header_structure_policy.h"
+#include "third_party/android_prediction/suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+#include "third_party/android_prediction/suggest/core/session/prev_words_info.h"
+
+namespace latinime {
+
+// 256K bytes threshold is heuristically used to distinguish dictionaries containing many unigrams
+// (e.g. main dictionary) from small dictionaries (e.g. contacts...)
+const int DicTraverseSession::DICTIONARY_SIZE_THRESHOLD_TO_USE_LARGE_CACHE_FOR_SUGGESTION =
+ 256 * 1024;
+
+void DicTraverseSession::init(const Dictionary *const dictionary,
+ const PrevWordsInfo *const prevWordsInfo, const SuggestOptions *const suggestOptions) {
+ mDictionary = dictionary;
+ mMultiWordCostMultiplier = getDictionaryStructurePolicy()->getHeaderStructurePolicy()
+ ->getMultiWordCostMultiplier();
+ mSuggestOptions = suggestOptions;
+ prevWordsInfo->getPrevWordsTerminalPtNodePos(
+ getDictionaryStructurePolicy(), mPrevWordsPtNodePos, true /* tryLowerCaseSearch */);
+}
+
+void DicTraverseSession::setupForGetSuggestions(const ProximityInfo *pInfo,
+ const int *inputCodePoints, const int inputSize, const int *const inputXs,
+ const int *const inputYs, const int *const times, const int *const pointerIds,
+ const float maxSpatialDistance, const int maxPointerCount) {
+ mProximityInfo = pInfo;
+ mMaxPointerCount = maxPointerCount;
+ initializeProximityInfoStates(inputCodePoints, inputXs, inputYs, times, pointerIds, inputSize,
+ maxSpatialDistance, maxPointerCount);
+}
+
+const DictionaryStructureWithBufferPolicy *DicTraverseSession::getDictionaryStructurePolicy()
+ const {
+ return mDictionary->getDictionaryStructurePolicy();
+}
+
+void DicTraverseSession::resetCache(const int thresholdForNextActiveDicNodes, const int maxWords) {
+ mDicNodesCache.reset(thresholdForNextActiveDicNodes /* nextActiveSize */,
+ maxWords /* terminalSize */);
+ mMultiBigramMap.clear();
+}
+
+void DicTraverseSession::initializeProximityInfoStates(const int *const inputCodePoints,
+ const int *const inputXs, const int *const inputYs, const int *const times,
+ const int *const pointerIds, const int inputSize, const float maxSpatialDistance,
+ const int maxPointerCount) {
+ ASSERT(1 <= maxPointerCount && maxPointerCount <= MAX_POINTER_COUNT_G);
+ mInputSize = 0;
+ for (int i = 0; i < maxPointerCount; ++i) {
+ mProximityInfoStates[i].initInputParams(i, maxSpatialDistance, getProximityInfo(),
+ inputCodePoints, inputSize, inputXs, inputYs, times, pointerIds,
+ maxPointerCount == MAX_POINTER_COUNT_G
+ /* TODO: this is a hack. fix proximity info state */);
+ mInputSize += mProximityInfoStates[i].size();
+ }
+}
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/core/session/dic_traverse_session.h b/third_party/android_prediction/suggest/core/session/dic_traverse_session.h
new file mode 100644
index 0000000..d74846e
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/session/dic_traverse_session.h
@@ -0,0 +1,187 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DIC_TRAVERSE_SESSION_H
+#define LATINIME_DIC_TRAVERSE_SESSION_H
+
+#include <vector>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/dicnode/dic_nodes_cache.h"
+#include "third_party/android_prediction/suggest/core/dictionary/multi_bigram_map.h"
+#include "third_party/android_prediction/suggest/core/layout/proximity_info_state.h"
+
+namespace latinime {
+
+class Dictionary;
+class DictionaryStructureWithBufferPolicy;
+class PrevWordsInfo;
+class ProximityInfo;
+class SuggestOptions;
+
+class DicTraverseSession {
+ public:
+
+ // A factory method for DicTraverseSession
+ static AK_FORCE_INLINE void *getSessionInstance(std::string localeStr,
+ long dictSize) {
+ // To deal with the trade-off between accuracy and memory space, large cache is used for
+ // dictionaries larger that the threshold
+ return new DicTraverseSession(localeStr,
+ dictSize >= DICTIONARY_SIZE_THRESHOLD_TO_USE_LARGE_CACHE_FOR_SUGGESTION);
+ }
+
+ static AK_FORCE_INLINE void releaseSessionInstance(DicTraverseSession *traverseSession) {
+ delete traverseSession;
+ }
+
+ AK_FORCE_INLINE DicTraverseSession(std::string localeStr, bool usesLargeCache)
+ : mProximityInfo(nullptr), mDictionary(nullptr), mSuggestOptions(nullptr),
+ mDicNodesCache(usesLargeCache), mMultiBigramMap(), mInputSize(0), mMaxPointerCount(1),
+ mMultiWordCostMultiplier(1.0f) {
+ // NOTE: mProximityInfoStates is an array of instances.
+ // No need to initialize it explicitly here.
+ for (size_t i = 0; i < NELEMS(mPrevWordsPtNodePos); ++i) {
+ mPrevWordsPtNodePos[i] = NOT_A_DICT_POS;
+ }
+ }
+
+ // Non virtual inline destructor -- never inherit this class
+ AK_FORCE_INLINE ~DicTraverseSession() {}
+
+ void init(const Dictionary *dictionary, const PrevWordsInfo *const prevWordsInfo,
+ const SuggestOptions *const suggestOptions);
+ // TODO: Remove and merge into init
+ void setupForGetSuggestions(const ProximityInfo *pInfo, const int *inputCodePoints,
+ const int inputSize, const int *const inputXs, const int *const inputYs,
+ const int *const times, const int *const pointerIds, const float maxSpatialDistance,
+ const int maxPointerCount);
+ void resetCache(const int thresholdForNextActiveDicNodes, const int maxWords);
+
+ const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const;
+
+ //--------------------
+ // getters and setters
+ //--------------------
+ const ProximityInfo *getProximityInfo() const { return mProximityInfo; }
+ const SuggestOptions *getSuggestOptions() const { return mSuggestOptions; }
+ const int *getPrevWordsPtNodePos() const { return mPrevWordsPtNodePos; }
+ DicNodesCache *getDicTraverseCache() { return &mDicNodesCache; }
+ MultiBigramMap *getMultiBigramMap() { return &mMultiBigramMap; }
+ const ProximityInfoState *getProximityInfoState(int id) const {
+ return &mProximityInfoStates[id];
+ }
+ int getInputSize() const { return mInputSize; }
+
+ bool isOnlyOnePointerUsed(int *pointerId) const {
+ // Not in the dictionary word
+ int usedPointerCount = 0;
+ int usedPointerId = 0;
+ for (int i = 0; i < mMaxPointerCount; ++i) {
+ if (mProximityInfoStates[i].isUsed()) {
+ ++usedPointerCount;
+ usedPointerId = i;
+ }
+ }
+ if (usedPointerCount != 1) {
+ return false;
+ }
+ if (pointerId) {
+ *pointerId = usedPointerId;
+ }
+ return true;
+ }
+
+ ProximityType getProximityTypeG(const DicNode *const dicNode, const int childCodePoint) const {
+ ProximityType proximityType = UNRELATED_CHAR;
+ for (int i = 0; i < MAX_POINTER_COUNT_G; ++i) {
+ if (!mProximityInfoStates[i].isUsed()) {
+ continue;
+ }
+ const int pointerId = dicNode->getInputIndex(i);
+ proximityType = mProximityInfoStates[i].getProximityTypeG(pointerId, childCodePoint);
+ ASSERT(proximityType == UNRELATED_CHAR || proximityType == MATCH_CHAR);
+ // TODO: Make this more generic
+ // Currently we assume there are only two types here -- UNRELATED_CHAR
+ // and MATCH_CHAR
+ if (proximityType != UNRELATED_CHAR) {
+ return proximityType;
+ }
+ }
+ return proximityType;
+ }
+
+ AK_FORCE_INLINE bool isCacheBorderForTyping(const int inputSize) const {
+ return mDicNodesCache.isCacheBorderForTyping(inputSize);
+ }
+
+ /**
+ * Returns whether or not it is possible to continue suggestion from the previous search.
+ */
+ // TODO: Remove. No need to check once the session is fully implemented.
+ bool isContinuousSuggestionPossible() const {
+ if (!mDicNodesCache.hasCachedDicNodesForContinuousSuggestion()) {
+ return false;
+ }
+ ASSERT(mMaxPointerCount <= MAX_POINTER_COUNT_G);
+ for (int i = 0; i < mMaxPointerCount; ++i) {
+ const ProximityInfoState *const pInfoState = getProximityInfoState(i);
+ // If a proximity info state is not continuous suggestion possible,
+ // do not continue searching.
+ if (pInfoState->isUsed() && !pInfoState->isContinuousSuggestionPossible()) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ bool isTouchPositionCorrectionEnabled() const {
+ return mProximityInfoStates[0].touchPositionCorrectionEnabled();
+ }
+
+ float getMultiWordCostMultiplier() const {
+ return mMultiWordCostMultiplier;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DicTraverseSession);
+ // threshold to start caching
+ static const int CACHE_START_INPUT_LENGTH_THRESHOLD;
+ static const int DICTIONARY_SIZE_THRESHOLD_TO_USE_LARGE_CACHE_FOR_SUGGESTION;
+ void initializeProximityInfoStates(const int *const inputCodePoints, const int *const inputXs,
+ const int *const inputYs, const int *const times, const int *const pointerIds,
+ const int inputSize, const float maxSpatialDistance, const int maxPointerCount);
+
+ int mPrevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ const ProximityInfo *mProximityInfo;
+ const Dictionary *mDictionary;
+ const SuggestOptions *mSuggestOptions;
+
+ DicNodesCache mDicNodesCache;
+ // Temporary cache for bigram frequencies
+ MultiBigramMap mMultiBigramMap;
+ ProximityInfoState mProximityInfoStates[MAX_POINTER_COUNT_G];
+
+ int mInputSize;
+ int mMaxPointerCount;
+
+ /////////////////////////////////
+ // Configuration per dictionary
+ float mMultiWordCostMultiplier;
+
+};
+} // namespace latinime
+#endif // LATINIME_DIC_TRAVERSE_SESSION_H
diff --git a/third_party/android_prediction/suggest/core/session/prev_words_info.h b/third_party/android_prediction/suggest/core/session/prev_words_info.h
new file mode 100644
index 0000000..fddac8c
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/session/prev_words_info.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PREV_WORDS_INFO_H
+#define LATINIME_PREV_WORDS_INFO_H
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
+#include "third_party/android_prediction/suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+#include "third_party/android_prediction/utils/char_utils.h"
+
+namespace latinime {
+
+// TODO: Support n-gram.
+class PrevWordsInfo {
+ public:
+ // No prev word information.
+ PrevWordsInfo() {
+ clear();
+ }
+
+ PrevWordsInfo(PrevWordsInfo &&prevWordsInfo) {
+ for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
+ mPrevWordCodePointCount[i] = prevWordsInfo.mPrevWordCodePointCount[i];
+ memmove(mPrevWordCodePoints[i], prevWordsInfo.mPrevWordCodePoints[i],
+ sizeof(mPrevWordCodePoints[i][0]) * mPrevWordCodePointCount[i]);
+ mIsBeginningOfSentence[i] = prevWordsInfo.mIsBeginningOfSentence[i];
+ }
+ }
+
+ // Construct from previous words.
+ PrevWordsInfo(const int prevWordCodePoints[][MAX_WORD_LENGTH],
+ const int *const prevWordCodePointCount, const bool *const isBeginningOfSentence,
+ const size_t prevWordCount) {
+ clear();
+ for (size_t i = 0; i < std::min(NELEMS(mPrevWordCodePoints), prevWordCount); ++i) {
+ if (prevWordCodePointCount[i] < 0 || prevWordCodePointCount[i] > MAX_WORD_LENGTH) {
+ continue;
+ }
+ memmove(mPrevWordCodePoints[i], prevWordCodePoints[i],
+ sizeof(mPrevWordCodePoints[i][0]) * prevWordCodePointCount[i]);
+ mPrevWordCodePointCount[i] = prevWordCodePointCount[i];
+ mIsBeginningOfSentence[i] = isBeginningOfSentence[i];
+ }
+ }
+
+ // Construct from a previous word.
+ PrevWordsInfo(const int *const prevWordCodePoints, const int prevWordCodePointCount,
+ const bool isBeginningOfSentence) {
+ clear();
+ if (prevWordCodePointCount > MAX_WORD_LENGTH || !prevWordCodePoints) {
+ return;
+ }
+ memmove(mPrevWordCodePoints[0], prevWordCodePoints,
+ sizeof(mPrevWordCodePoints[0][0]) * prevWordCodePointCount);
+ mPrevWordCodePointCount[0] = prevWordCodePointCount;
+ mIsBeginningOfSentence[0] = isBeginningOfSentence;
+ }
+
+ bool isValid() const {
+ if (mPrevWordCodePointCount[0] > 0) {
+ return true;
+ }
+ if (mIsBeginningOfSentence[0]) {
+ return true;
+ }
+ return false;
+ }
+
+ void getPrevWordsTerminalPtNodePos(
+ const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
+ int *const outPrevWordsTerminalPtNodePos, const bool tryLowerCaseSearch) const {
+ for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
+ outPrevWordsTerminalPtNodePos[i] = getTerminalPtNodePosOfWord(dictStructurePolicy,
+ mPrevWordCodePoints[i], mPrevWordCodePointCount[i],
+ mIsBeginningOfSentence[i], tryLowerCaseSearch);
+ }
+ }
+
+ // n is 1-indexed.
+ const int *getNthPrevWordCodePoints(const int n) const {
+ if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) {
+ return nullptr;
+ }
+ return mPrevWordCodePoints[n - 1];
+ }
+
+ // n is 1-indexed.
+ int getNthPrevWordCodePointCount(const int n) const {
+ if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) {
+ return 0;
+ }
+ return mPrevWordCodePointCount[n - 1];
+ }
+
+ // n is 1-indexed.
+ bool isNthPrevWordBeginningOfSentence(const int n) const {
+ if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) {
+ return false;
+ }
+ return mIsBeginningOfSentence[n - 1];
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(PrevWordsInfo);
+
+ static int getTerminalPtNodePosOfWord(
+ const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
+ const int *const wordCodePoints, const int wordCodePointCount,
+ const bool isBeginningOfSentence, const bool tryLowerCaseSearch) {
+ if (!dictStructurePolicy || !wordCodePoints || wordCodePointCount > MAX_WORD_LENGTH) {
+ return NOT_A_DICT_POS;
+ }
+ int codePoints[MAX_WORD_LENGTH];
+ int codePointCount = wordCodePointCount;
+ memmove(codePoints, wordCodePoints, sizeof(int) * codePointCount);
+ if (isBeginningOfSentence) {
+ codePointCount = CharUtils::attachBeginningOfSentenceMarker(codePoints,
+ codePointCount, MAX_WORD_LENGTH);
+ if (codePointCount <= 0) {
+ return NOT_A_DICT_POS;
+ }
+ }
+ const int wordPtNodePos = dictStructurePolicy->getTerminalPtNodePositionOfWord(
+ codePoints, codePointCount, false /* forceLowerCaseSearch */);
+ if (wordPtNodePos != NOT_A_DICT_POS || !tryLowerCaseSearch) {
+ // Return the position when when the word was found or doesn't try lower case
+ // search.
+ return wordPtNodePos;
+ }
+ // Check bigrams for lower-cased previous word if original was not found. Useful for
+ // auto-capitalized words like "The [current_word]".
+ return dictStructurePolicy->getTerminalPtNodePositionOfWord(
+ codePoints, codePointCount, true /* forceLowerCaseSearch */);
+ }
+
+ void clear() {
+ for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
+ mPrevWordCodePointCount[i] = 0;
+ mIsBeginningOfSentence[i] = false;
+ }
+ }
+
+ int mPrevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH];
+ int mPrevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ bool mIsBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+};
+} // namespace latinime
+#endif // LATINIME_PREV_WORDS_INFO_H
diff --git a/third_party/android_prediction/suggest/core/suggest.cpp b/third_party/android_prediction/suggest/core/suggest.cpp
new file mode 100644
index 0000000..0aa1c33
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/suggest.cpp
@@ -0,0 +1,435 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/core/suggest.h"
+
+#include "third_party/android_prediction/suggest/core/dicnode/dic_node.h"
+#include "third_party/android_prediction/suggest/core/dicnode/dic_node_priority_queue.h"
+#include "third_party/android_prediction/suggest/core/dicnode/dic_node_vector.h"
+#include "third_party/android_prediction/suggest/core/dictionary/dictionary.h"
+#include "third_party/android_prediction/suggest/core/dictionary/digraph_utils.h"
+#include "third_party/android_prediction/suggest/core/layout/proximity_info.h"
+#include "third_party/android_prediction/suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+#include "third_party/android_prediction/suggest/core/policy/traversal.h"
+#include "third_party/android_prediction/suggest/core/policy/weighting.h"
+#include "third_party/android_prediction/suggest/core/result/suggestions_output_utils.h"
+#include "third_party/android_prediction/suggest/core/session/dic_traverse_session.h"
+
+namespace latinime {
+
+// Initialization of class constants.
+const int Suggest::MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE = 2;
+
+/**
+ * Returns a set of suggestions for the given input touch points. The commitPoint argument indicates
+ * whether to prematurely commit the suggested words up to the given point for sentence-level
+ * suggestion.
+ *
+ * Note: Currently does not support concurrent calls across threads. Continuous suggestion is
+ * automatically activated for sequential calls that share the same starting input.
+ * TODO: Stop detecting continuous suggestion. Start using traverseSession instead.
+ */
+void Suggest::getSuggestions(ProximityInfo *pInfo, void *traverseSession,
+ int *inputXs, int *inputYs, int *times, int *pointerIds, int *inputCodePoints,
+ int inputSize, const float languageWeight,
+ SuggestionResults *const outSuggestionResults) const {
+ PROF_OPEN;
+ PROF_START(0);
+ const float maxSpatialDistance = TRAVERSAL->getMaxSpatialDistance();
+ DicTraverseSession *tSession = static_cast<DicTraverseSession *>(traverseSession);
+ tSession->setupForGetSuggestions(pInfo, inputCodePoints, inputSize, inputXs, inputYs, times,
+ pointerIds, maxSpatialDistance, TRAVERSAL->getMaxPointerCount());
+ // TODO: Add the way to evaluate cache
+
+ initializeSearch(tSession);
+ PROF_END(0);
+ PROF_START(1);
+
+ // keep expanding search dicNodes until all have terminated.
+ while (tSession->getDicTraverseCache()->activeSize() > 0) {
+ expandCurrentDicNodes(tSession);
+ tSession->getDicTraverseCache()->advanceActiveDicNodes();
+ tSession->getDicTraverseCache()->advanceInputIndex(inputSize);
+ }
+ PROF_END(1);
+ PROF_START(2);
+ SuggestionsOutputUtils::outputSuggestions(
+ SCORING, tSession, languageWeight, outSuggestionResults);
+ PROF_END(2);
+ PROF_CLOSE;
+}
+
+/**
+ * Initializes the search at the root of the lexicon trie. Note that when possible the search will
+ * continue suggestion from where it left off during the last call.
+ */
+void Suggest::initializeSearch(DicTraverseSession *traverseSession) const {
+ if (!traverseSession->getProximityInfoState(0)->isUsed()) {
+ return;
+ }
+
+ if (traverseSession->getInputSize() > MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE
+ && traverseSession->isContinuousSuggestionPossible()) {
+ // Continue suggestion
+ traverseSession->getDicTraverseCache()->continueSearch();
+ } else {
+ // Restart recognition at the root.
+ traverseSession->resetCache(TRAVERSAL->getMaxCacheSize(traverseSession->getInputSize()),
+ TRAVERSAL->getTerminalCacheSize());
+ // Create a new dic node here
+ DicNode rootNode;
+ DicNodeUtils::initAsRoot(traverseSession->getDictionaryStructurePolicy(),
+ traverseSession->getPrevWordsPtNodePos(), &rootNode);
+ traverseSession->getDicTraverseCache()->copyPushActive(&rootNode);
+ }
+}
+
+/**
+ * Expands the dicNodes in the current search priority queue by advancing to the possible child
+ * nodes based on the next touch point(s) (or no touch points for lookahead)
+ */
+void Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const {
+ const int inputSize = traverseSession->getInputSize();
+ DicNodeVector childDicNodes(TRAVERSAL->getDefaultExpandDicNodeSize());
+ DicNode correctionDicNode;
+
+ // TODO: Find more efficient caching
+ const bool shouldDepthLevelCache = TRAVERSAL->shouldDepthLevelCache(traverseSession);
+ if (shouldDepthLevelCache) {
+ traverseSession->getDicTraverseCache()->updateLastCachedInputIndex();
+ }
+ if (DEBUG_CACHE) {
+ AKLOGI("expandCurrentDicNodes depth level cache = %d, inputSize = %d",
+ shouldDepthLevelCache, inputSize);
+ }
+ while (traverseSession->getDicTraverseCache()->activeSize() > 0) {
+ DicNode dicNode;
+ traverseSession->getDicTraverseCache()->popActive(&dicNode);
+ if (dicNode.isTotalInputSizeExceedingLimit()) {
+ return;
+ }
+ childDicNodes.clear();
+ const int point0Index = dicNode.getInputIndex(0);
+ const bool canDoLookAheadCorrection =
+ TRAVERSAL->canDoLookAheadCorrection(traverseSession, &dicNode);
+ const bool isLookAheadCorrection = canDoLookAheadCorrection
+ && traverseSession->getDicTraverseCache()->
+ isLookAheadCorrectionInputIndex(static_cast<int>(point0Index));
+ const bool isCompletion = dicNode.isCompletion(inputSize);
+
+ const bool shouldNodeLevelCache =
+ TRAVERSAL->shouldNodeLevelCache(traverseSession, &dicNode);
+ if (shouldDepthLevelCache || shouldNodeLevelCache) {
+ if (DEBUG_CACHE) {
+ dicNode.dump("PUSH_CACHE");
+ }
+ traverseSession->getDicTraverseCache()->copyPushContinue(&dicNode);
+ dicNode.setCached();
+ }
+
+ if (dicNode.isInDigraph()) {
+ // Finish digraph handling if the node is in the middle of a digraph expansion.
+ processDicNodeAsDigraph(traverseSession, &dicNode);
+ } else if (isLookAheadCorrection) {
+ // The algorithm maintains a small set of "deferred" nodes that have not consumed the
+ // latest touch point yet. These are needed to apply look-ahead correction operations
+ // that require special handling of the latest touch point. For example, with insertions
+ // (e.g., "thiis" -> "this") the latest touch point should not be consumed at all.
+ processDicNodeAsTransposition(traverseSession, &dicNode);
+ processDicNodeAsInsertion(traverseSession, &dicNode);
+ } else { // !isLookAheadCorrection
+ // Only consider typing error corrections if the normalized compound distance is
+ // below a spatial distance threshold.
+ // NOTE: the threshold may need to be updated if scoring model changes.
+ // TODO: Remove. Do not prune node here.
+ const bool allowsErrorCorrections = TRAVERSAL->allowsErrorCorrections(&dicNode);
+ // Process for handling space substitution (e.g., hevis => he is)
+ if (allowsErrorCorrections
+ && TRAVERSAL->isSpaceSubstitutionTerminal(traverseSession, &dicNode)) {
+ createNextWordDicNode(traverseSession, &dicNode, true /* spaceSubstitution */);
+ }
+
+ DicNodeUtils::getAllChildDicNodes(
+ &dicNode, traverseSession->getDictionaryStructurePolicy(), &childDicNodes);
+
+ const int childDicNodesSize = childDicNodes.getSizeAndLock();
+ for (int i = 0; i < childDicNodesSize; ++i) {
+ DicNode *const childDicNode = childDicNodes[i];
+ if (isCompletion) {
+ // Handle forward lookahead when the lexicon letter exceeds the input size.
+ processDicNodeAsMatch(traverseSession, childDicNode);
+ continue;
+ }
+ if (DigraphUtils::hasDigraphForCodePoint(
+ traverseSession->getDictionaryStructurePolicy()
+ ->getHeaderStructurePolicy(),
+ childDicNode->getNodeCodePoint())) {
+ correctionDicNode.initByCopy(childDicNode);
+ correctionDicNode.advanceDigraphIndex();
+ processDicNodeAsDigraph(traverseSession, &correctionDicNode);
+ }
+ if (TRAVERSAL->isOmission(traverseSession, &dicNode, childDicNode,
+ allowsErrorCorrections)) {
+ // TODO: (Gesture) Change weight between omission and substitution errors
+ // TODO: (Gesture) Terminal node should not be handled as omission
+ correctionDicNode.initByCopy(childDicNode);
+ processDicNodeAsOmission(traverseSession, &correctionDicNode);
+ }
+ const ProximityType proximityType = TRAVERSAL->getProximityType(
+ traverseSession, &dicNode, childDicNode);
+ switch (proximityType) {
+ // TODO: Consider the difference of proximityType here
+ case MATCH_CHAR:
+ case PROXIMITY_CHAR:
+ processDicNodeAsMatch(traverseSession, childDicNode);
+ break;
+ case ADDITIONAL_PROXIMITY_CHAR:
+ if (allowsErrorCorrections) {
+ processDicNodeAsAdditionalProximityChar(traverseSession, &dicNode,
+ childDicNode);
+ }
+ break;
+ case SUBSTITUTION_CHAR:
+ if (allowsErrorCorrections) {
+ processDicNodeAsSubstitution(traverseSession, &dicNode, childDicNode);
+ }
+ break;
+ case UNRELATED_CHAR:
+ // Just drop this dicNode and do nothing.
+ break;
+ default:
+ // Just drop this dicNode and do nothing.
+ break;
+ }
+ }
+
+ // Push the dicNode for look-ahead correction
+ if (allowsErrorCorrections && canDoLookAheadCorrection) {
+ traverseSession->getDicTraverseCache()->copyPushNextActive(&dicNode);
+ }
+ }
+ }
+}
+
+void Suggest::processTerminalDicNode(
+ DicTraverseSession *traverseSession, DicNode *dicNode) const {
+ if (dicNode->getCompoundDistance() >= static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) {
+ return;
+ }
+ if (!dicNode->isTerminalDicNode()) {
+ return;
+ }
+ if (dicNode->shouldBeFilteredBySafetyNetForBigram()) {
+ return;
+ }
+ if (!dicNode->hasMatchedOrProximityCodePoints()) {
+ return;
+ }
+ // Create a non-cached node here.
+ DicNode terminalDicNode(*dicNode);
+ if (TRAVERSAL->needsToTraverseAllUserInput()
+ && dicNode->getInputIndex(0) < traverseSession->getInputSize()) {
+ Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_TERMINAL_INSERTION, traverseSession, 0,
+ &terminalDicNode, traverseSession->getMultiBigramMap());
+ }
+ Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_TERMINAL, traverseSession, 0,
+ &terminalDicNode, traverseSession->getMultiBigramMap());
+ traverseSession->getDicTraverseCache()->copyPushTerminal(&terminalDicNode);
+}
+
+/**
+ * Adds the expanded dicNode to the next search priority queue. Also creates an additional next word
+ * (by the space omission error correction) search path if input dicNode is on a terminal.
+ */
+void Suggest::processExpandedDicNode(
+ DicTraverseSession *traverseSession, DicNode *dicNode) const {
+ processTerminalDicNode(traverseSession, dicNode);
+ if (dicNode->getCompoundDistance() < static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) {
+ if (TRAVERSAL->isSpaceOmissionTerminal(traverseSession, dicNode)) {
+ createNextWordDicNode(traverseSession, dicNode, false /* spaceSubstitution */);
+ }
+ const int allowsLookAhead = !(dicNode->hasMultipleWords()
+ && dicNode->isCompletion(traverseSession->getInputSize()));
+ if (dicNode->hasChildren() && allowsLookAhead) {
+ traverseSession->getDicTraverseCache()->copyPushNextActive(dicNode);
+ }
+ }
+}
+
+void Suggest::processDicNodeAsMatch(DicTraverseSession *traverseSession,
+ DicNode *childDicNode) const {
+ weightChildNode(traverseSession, childDicNode);
+ processExpandedDicNode(traverseSession, childDicNode);
+}
+
+void Suggest::processDicNodeAsAdditionalProximityChar(DicTraverseSession *traverseSession,
+ DicNode *dicNode, DicNode *childDicNode) const {
+ // Note: Most types of corrections don't need to look up the bigram information since they do
+ // not treat the node as a terminal. There is no need to pass the bigram map in these cases.
+ Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_ADDITIONAL_PROXIMITY,
+ traverseSession, dicNode, childDicNode, 0 /* multiBigramMap */);
+ weightChildNode(traverseSession, childDicNode);
+ processExpandedDicNode(traverseSession, childDicNode);
+}
+
+void Suggest::processDicNodeAsSubstitution(DicTraverseSession *traverseSession,
+ DicNode *dicNode, DicNode *childDicNode) const {
+ Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_SUBSTITUTION, traverseSession,
+ dicNode, childDicNode, 0 /* multiBigramMap */);
+ weightChildNode(traverseSession, childDicNode);
+ processExpandedDicNode(traverseSession, childDicNode);
+}
+
+// Process the DicNode codepoint as a digraph. This means that composite glyphs like the German
+// u-umlaut is expanded to the transliteration "ue". Note that this happens in parallel with
+// the normal non-digraph traversal, so both "uber" and "ueber" can be corrected to "[u-umlaut]ber".
+void Suggest::processDicNodeAsDigraph(DicTraverseSession *traverseSession,
+ DicNode *childDicNode) const {
+ weightChildNode(traverseSession, childDicNode);
+ childDicNode->advanceDigraphIndex();
+ processExpandedDicNode(traverseSession, childDicNode);
+}
+
+/**
+ * Handle the dicNode as an omission error (e.g., ths => this). Skip the current letter and consider
+ * matches for all possible next letters. Note that just skipping the current letter without any
+ * other conditions tends to flood the search DicNodes cache with omission DicNodes. Instead, check
+ * the possible *next* letters after the omission to better limit search to plausible omissions.
+ * Note that apostrophes are handled as omissions.
+ */
+void Suggest::processDicNodeAsOmission(
+ DicTraverseSession *traverseSession, DicNode *dicNode) const {
+ DicNodeVector childDicNodes;
+ DicNodeUtils::getAllChildDicNodes(
+ dicNode, traverseSession->getDictionaryStructurePolicy(), &childDicNodes);
+
+ const int size = childDicNodes.getSizeAndLock();
+ for (int i = 0; i < size; i++) {
+ DicNode *const childDicNode = childDicNodes[i];
+ // Treat this word as omission
+ Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_OMISSION, traverseSession,
+ dicNode, childDicNode, 0 /* multiBigramMap */);
+ weightChildNode(traverseSession, childDicNode);
+ if (!TRAVERSAL->isPossibleOmissionChildNode(traverseSession, dicNode, childDicNode)) {
+ continue;
+ }
+ processExpandedDicNode(traverseSession, childDicNode);
+ }
+}
+
+/**
+ * Handle the dicNode as an insertion error (e.g., thiis => this). Skip the current touch point and
+ * consider matches for the next touch point.
+ */
+void Suggest::processDicNodeAsInsertion(DicTraverseSession *traverseSession,
+ DicNode *dicNode) const {
+ const int16_t pointIndex = dicNode->getInputIndex(0);
+ DicNodeVector childDicNodes;
+ DicNodeUtils::getAllChildDicNodes(dicNode, traverseSession->getDictionaryStructurePolicy(),
+ &childDicNodes);
+ const int size = childDicNodes.getSizeAndLock();
+ for (int i = 0; i < size; i++) {
+ if (traverseSession->getProximityInfoState(0)->getPrimaryCodePointAt(pointIndex + 1)
+ != childDicNodes[i]->getNodeCodePoint()) {
+ continue;
+ }
+ DicNode *const childDicNode = childDicNodes[i];
+ Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_INSERTION, traverseSession,
+ dicNode, childDicNode, 0 /* multiBigramMap */);
+ processExpandedDicNode(traverseSession, childDicNode);
+ }
+}
+
+/**
+ * Handle the dicNode as a transposition error (e.g., thsi => this). Swap the next two touch points.
+ */
+void Suggest::processDicNodeAsTransposition(DicTraverseSession *traverseSession,
+ DicNode *dicNode) const {
+ const int16_t pointIndex = dicNode->getInputIndex(0);
+ DicNodeVector childDicNodes1;
+ DicNodeVector childDicNodes2;
+ DicNodeUtils::getAllChildDicNodes(dicNode, traverseSession->getDictionaryStructurePolicy(),
+ &childDicNodes1);
+ const int childSize1 = childDicNodes1.getSizeAndLock();
+ for (int i = 0; i < childSize1; i++) {
+ const ProximityType matchedId1 = traverseSession->getProximityInfoState(0)
+ ->getProximityType(pointIndex + 1, childDicNodes1[i]->getNodeCodePoint(),
+ true /* checkProximityChars */);
+ if (!ProximityInfoUtils::isMatchOrProximityChar(matchedId1)) {
+ continue;
+ }
+ if (childDicNodes1[i]->hasChildren()) {
+ childDicNodes2.clear();
+ DicNodeUtils::getAllChildDicNodes(childDicNodes1[i],
+ traverseSession->getDictionaryStructurePolicy(), &childDicNodes2);
+ const int childSize2 = childDicNodes2.getSizeAndLock();
+ for (int j = 0; j < childSize2; j++) {
+ DicNode *const childDicNode2 = childDicNodes2[j];
+ const ProximityType matchedId2 = traverseSession->getProximityInfoState(0)
+ ->getProximityType(pointIndex, childDicNode2->getNodeCodePoint(),
+ true /* checkProximityChars */);
+ if (!ProximityInfoUtils::isMatchOrProximityChar(matchedId2)) {
+ continue;
+ }
+ Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_TRANSPOSITION,
+ traverseSession, childDicNodes1[i], childDicNode2, 0 /* multiBigramMap */);
+ processExpandedDicNode(traverseSession, childDicNode2);
+ }
+ }
+ }
+}
+
+/**
+ * Weight child dicNode by aligning it to the key
+ */
+void Suggest::weightChildNode(DicTraverseSession *traverseSession, DicNode *dicNode) const {
+ const int inputSize = traverseSession->getInputSize();
+ if (dicNode->isCompletion(inputSize)) {
+ Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_COMPLETION, traverseSession,
+ 0 /* parentDicNode */, dicNode, 0 /* multiBigramMap */);
+ } else { // completion
+ Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_MATCH, traverseSession,
+ 0 /* parentDicNode */, dicNode, 0 /* multiBigramMap */);
+ }
+}
+
+/**
+ * Creates a new dicNode that represents a space insertion at the end of the input dicNode. Also
+ * incorporates the unigram / bigram score for the ending word into the new dicNode.
+ */
+void Suggest::createNextWordDicNode(DicTraverseSession *traverseSession, DicNode *dicNode,
+ const bool spaceSubstitution) const {
+ if (!TRAVERSAL->isGoodToTraverseNextWord(dicNode)) {
+ return;
+ }
+
+ // Create a non-cached node here.
+ DicNode newDicNode;
+ DicNodeUtils::initAsRootWithPreviousWord(
+ traverseSession->getDictionaryStructurePolicy(), dicNode, &newDicNode);
+ const CorrectionType correctionType = spaceSubstitution ?
+ CT_NEW_WORD_SPACE_SUBSTITUTION : CT_NEW_WORD_SPACE_OMISSION;
+ Weighting::addCostAndForwardInputIndex(WEIGHTING, correctionType, traverseSession, dicNode,
+ &newDicNode, traverseSession->getMultiBigramMap());
+ if (newDicNode.getCompoundDistance() < static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) {
+ // newDicNode is worth continuing to traverse.
+ // CAVEAT: This pruning is important for speed. Remove this when we can afford not to prune
+ // here because here is not the right place to do pruning. Pruning should take place only
+ // in DicNodePriorityQueue.
+ traverseSession->getDicTraverseCache()->copyPushNextActive(&newDicNode);
+ }
+}
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/core/suggest.h b/third_party/android_prediction/suggest/core/suggest.h
new file mode 100644
index 0000000..b4615d6
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/suggest.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_SUGGEST_IMPL_H
+#define LATINIME_SUGGEST_IMPL_H
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/suggest_interface.h"
+#include "third_party/android_prediction/suggest/core/policy/suggest_policy.h"
+
+namespace latinime {
+
+// Naming convention
+// - Distance: "Weighted" edit distance -- used both for spatial and language.
+// - Compound Distance: Spatial Distance + Language Distance -- used for pruning and scoring
+// - Cost: delta/diff for Distance -- used both for spatial and language
+// - Length: "Non-weighted" -- used only for spatial
+// - Probability: "Non-weighted" -- used only for language
+// - Score: Final calibrated score based on the compound distance, which is sent to java as the
+// priority of a suggested word
+
+class DicNode;
+class DicTraverseSession;
+class ProximityInfo;
+class Scoring;
+class SuggestionResults;
+class Traversal;
+class Weighting;
+
+class Suggest : public SuggestInterface {
+ public:
+ AK_FORCE_INLINE Suggest(const SuggestPolicy *const suggestPolicy)
+ : TRAVERSAL(suggestPolicy ? suggestPolicy->getTraversal() : nullptr),
+ SCORING(suggestPolicy ? suggestPolicy->getScoring() : nullptr),
+ WEIGHTING(suggestPolicy ? suggestPolicy->getWeighting() : nullptr) {}
+ AK_FORCE_INLINE virtual ~Suggest() {}
+ void getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs,
+ int *times, int *pointerIds, int *inputCodePoints, int inputSize,
+ const float languageWeight, SuggestionResults *const outSuggestionResults) const;
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Suggest);
+ void createNextWordDicNode(DicTraverseSession *traverseSession, DicNode *dicNode,
+ const bool spaceSubstitution) const;
+ void initializeSearch(DicTraverseSession *traverseSession) const;
+ void expandCurrentDicNodes(DicTraverseSession *traverseSession) const;
+ void processTerminalDicNode(DicTraverseSession *traverseSession, DicNode *dicNode) const;
+ void processExpandedDicNode(DicTraverseSession *traverseSession, DicNode *dicNode) const;
+ void weightChildNode(DicTraverseSession *traverseSession, DicNode *dicNode) const;
+ void processDicNodeAsOmission(DicTraverseSession *traverseSession, DicNode *dicNode) const;
+ void processDicNodeAsDigraph(DicTraverseSession *traverseSession, DicNode *dicNode) const;
+ void processDicNodeAsTransposition(DicTraverseSession *traverseSession,
+ DicNode *dicNode) const;
+ void processDicNodeAsInsertion(DicTraverseSession *traverseSession, DicNode *dicNode) const;
+ void processDicNodeAsAdditionalProximityChar(DicTraverseSession *traverseSession,
+ DicNode *dicNode, DicNode *childDicNode) const;
+ void processDicNodeAsSubstitution(DicTraverseSession *traverseSession, DicNode *dicNode,
+ DicNode *childDicNode) const;
+ void processDicNodeAsMatch(DicTraverseSession *traverseSession,
+ DicNode *childDicNode) const;
+
+ static const int MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE;
+
+ const Traversal *const TRAVERSAL;
+ const Scoring *const SCORING;
+ const Weighting *const WEIGHTING;
+};
+} // namespace latinime
+#endif // LATINIME_SUGGEST_IMPL_H
diff --git a/third_party/android_prediction/suggest/core/suggest_interface.h b/third_party/android_prediction/suggest/core/suggest_interface.h
new file mode 100644
index 0000000..b07f6bc
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/suggest_interface.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_SUGGEST_INTERFACE_H
+#define LATINIME_SUGGEST_INTERFACE_H
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+class ProximityInfo;
+class SuggestionResults;
+
+class SuggestInterface {
+ public:
+ virtual void getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs,
+ int *inputYs, int *times, int *pointerIds, int *inputCodePoints, int inputSize,
+ const float languageWeight, SuggestionResults *const suggestionResults) const = 0;
+ SuggestInterface() {}
+ virtual ~SuggestInterface() {}
+ private:
+ DISALLOW_COPY_AND_ASSIGN(SuggestInterface);
+};
+} // namespace latinime
+#endif // LATINIME_SUGGEST_INTERFACE_H
diff --git a/third_party/android_prediction/suggest/core/suggest_options.h b/third_party/android_prediction/suggest/core/suggest_options.h
new file mode 100644
index 0000000..2c67ce4
--- /dev/null
+++ b/third_party/android_prediction/suggest/core/suggest_options.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_SUGGEST_OPTIONS_H
+#define LATINIME_SUGGEST_OPTIONS_H
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+class SuggestOptions{
+ public:
+ SuggestOptions(const int *const options, const int length)
+ : mOptions(options), mLength(length) {}
+
+ AK_FORCE_INLINE bool isGesture() const {
+ return getBoolOption(IS_GESTURE);
+ }
+
+ AK_FORCE_INLINE bool useFullEditDistance() const {
+ return getBoolOption(USE_FULL_EDIT_DISTANCE);
+ }
+
+ AK_FORCE_INLINE bool blockOffensiveWords() const {
+ return getBoolOption(BLOCK_OFFENSIVE_WORDS);
+ }
+
+ AK_FORCE_INLINE bool enableSpaceAwareGesture() const {
+ return getBoolOption(SPACE_AWARE_GESTURE_ENABLED);
+ }
+
+ AK_FORCE_INLINE bool getAdditionalFeaturesBoolOption(const int key) const {
+ return getBoolOption(key + ADDITIONAL_FEATURES_OPTIONS);
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(SuggestOptions);
+
+ // Need to update com.android.inputmethod.latin.NativeSuggestOptions when you add, remove or
+ // reorder options.
+ static const int IS_GESTURE = 0;
+ static const int USE_FULL_EDIT_DISTANCE = 1;
+ static const int BLOCK_OFFENSIVE_WORDS = 2;
+ static const int SPACE_AWARE_GESTURE_ENABLED = 3;
+ // Additional features options are stored after the other options and used as setting values of
+ // experimental features.
+ static const int ADDITIONAL_FEATURES_OPTIONS = 4;
+
+ const int *const mOptions;
+ const int mLength;
+
+ AK_FORCE_INLINE bool isValidKey(const int key) const {
+ return 0 <= key && key < mLength;
+ }
+
+ AK_FORCE_INLINE bool getBoolOption(const int key) const {
+ if (isValidKey(key)) {
+ return mOptions[key] != 0;
+ }
+ return false;
+ }
+
+ AK_FORCE_INLINE int getIntOption(const int key) const {
+ if (isValidKey(key)) {
+ return mOptions[key];
+ }
+ return 0;
+ }
+};
+} // namespace latinime
+#endif // LATINIME_SUGGEST_OPTIONS_H
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/header/header_policy.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/header/header_policy.cpp
new file mode 100644
index 0000000..75133ac
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/header/header_policy.cpp
@@ -0,0 +1,160 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/header/header_policy.h"
+
+#include <algorithm>
+
+namespace latinime {
+
+// Note that these are corresponding definitions in Java side in DictionaryHeader.
+const char *const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY = "MULTIPLE_WORDS_DEMOTION_RATE";
+const char *const HeaderPolicy::REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY =
+ "REQUIRES_GERMAN_UMLAUT_PROCESSING";
+// TODO: Change attribute string to "IS_DECAYING_DICT".
+const char *const HeaderPolicy::IS_DECAYING_DICT_KEY = "USES_FORGETTING_CURVE";
+const char *const HeaderPolicy::DATE_KEY = "date";
+const char *const HeaderPolicy::LAST_DECAYED_TIME_KEY = "LAST_DECAYED_TIME";
+const char *const HeaderPolicy::UNIGRAM_COUNT_KEY = "UNIGRAM_COUNT";
+const char *const HeaderPolicy::BIGRAM_COUNT_KEY = "BIGRAM_COUNT";
+const char *const HeaderPolicy::EXTENDED_REGION_SIZE_KEY = "EXTENDED_REGION_SIZE";
+// Historical info is information that is needed to support decaying such as timestamp, level and
+// count.
+const char *const HeaderPolicy::HAS_HISTORICAL_INFO_KEY = "HAS_HISTORICAL_INFO";
+const char *const HeaderPolicy::LOCALE_KEY = "locale"; // match Java declaration
+const char *const HeaderPolicy::FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP_KEY =
+ "FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP";
+const char *const HeaderPolicy::FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY =
+ "FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID";
+const char *const HeaderPolicy::FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS_KEY =
+ "FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS";
+
+const char *const HeaderPolicy::MAX_UNIGRAM_COUNT_KEY = "MAX_UNIGRAM_COUNT";
+const char *const HeaderPolicy::MAX_BIGRAM_COUNT_KEY = "MAX_BIGRAM_COUNT";
+
+const int HeaderPolicy::DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE = 100;
+const float HeaderPolicy::MULTIPLE_WORD_COST_MULTIPLIER_SCALE = 100.0f;
+const int HeaderPolicy::DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP = 2;
+const int HeaderPolicy::DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID = 3;
+// 30 days
+const int HeaderPolicy::DEFAULT_FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS =
+ 30 * 24 * 60 * 60;
+
+const int HeaderPolicy::DEFAULT_MAX_UNIGRAM_COUNT = 10000;
+const int HeaderPolicy::DEFAULT_MAX_BIGRAM_COUNT = 10000;
+
+// Used for logging. Question mark is used to indicate that the key is not found.
+void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *outValue,
+ int outValueSize) const {
+ if (outValueSize <= 0) return;
+ if (outValueSize == 1) {
+ outValue[0] = '\0';
+ return;
+ }
+ std::vector<int> keyCodePointVector;
+ HeaderReadWriteUtils::insertCharactersIntoVector(key, &keyCodePointVector);
+ DictionaryHeaderStructurePolicy::AttributeMap::const_iterator it =
+ mAttributeMap.find(keyCodePointVector);
+ if (it == mAttributeMap.end()) {
+ // The key was not found.
+ outValue[0] = '?';
+ outValue[1] = '\0';
+ return;
+ }
+ const int terminalIndex = std::min(static_cast<int>(it->second.size()), outValueSize - 1);
+ for (int i = 0; i < terminalIndex; ++i) {
+ outValue[i] = it->second[i];
+ }
+ outValue[terminalIndex] = '\0';
+}
+
+const std::vector<int> HeaderPolicy::readLocale() const {
+ return HeaderReadWriteUtils::readCodePointVectorAttributeValue(&mAttributeMap, LOCALE_KEY);
+}
+
+float HeaderPolicy::readMultipleWordCostMultiplier() const {
+ const int demotionRate = HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
+ MULTIPLE_WORDS_DEMOTION_RATE_KEY, DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE);
+ if (demotionRate <= 0) {
+ return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
+ }
+ return MULTIPLE_WORD_COST_MULTIPLIER_SCALE / static_cast<float>(demotionRate);
+}
+
+bool HeaderPolicy::readRequiresGermanUmlautProcessing() const {
+ return HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap,
+ REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY, false);
+}
+
+bool HeaderPolicy::fillInAndWriteHeaderToBuffer(const bool updatesLastDecayedTime,
+ const int unigramCount, const int bigramCount,
+ const int extendedRegionSize, BufferWithExtendableBuffer *const outBuffer) const {
+ int writingPos = 0;
+ DictionaryHeaderStructurePolicy::AttributeMap attributeMapToWrite(mAttributeMap);
+ fillInHeader(updatesLastDecayedTime, unigramCount, bigramCount,
+ extendedRegionSize, &attributeMapToWrite);
+ if (!HeaderReadWriteUtils::writeDictionaryVersion(outBuffer, mDictFormatVersion,
+ &writingPos)) {
+ return false;
+ }
+ if (!HeaderReadWriteUtils::writeDictionaryFlags(outBuffer, mDictionaryFlags,
+ &writingPos)) {
+ return false;
+ }
+ // Temporarily writes a dummy header size.
+ int headerSizeFieldPos = writingPos;
+ if (!HeaderReadWriteUtils::writeDictionaryHeaderSize(outBuffer, 0 /* size */,
+ &writingPos)) {
+ return false;
+ }
+ if (!HeaderReadWriteUtils::writeHeaderAttributes(outBuffer, &attributeMapToWrite,
+ &writingPos)) {
+ return false;
+ }
+ // Writes the actual header size.
+ if (!HeaderReadWriteUtils::writeDictionaryHeaderSize(outBuffer, writingPos,
+ &headerSizeFieldPos)) {
+ return false;
+ }
+ return true;
+}
+
+void HeaderPolicy::fillInHeader(const bool updatesLastDecayedTime, const int unigramCount,
+ const int bigramCount, const int extendedRegionSize,
+ DictionaryHeaderStructurePolicy::AttributeMap *outAttributeMap) const {
+ HeaderReadWriteUtils::setIntAttribute(outAttributeMap, UNIGRAM_COUNT_KEY, unigramCount);
+ HeaderReadWriteUtils::setIntAttribute(outAttributeMap, BIGRAM_COUNT_KEY, bigramCount);
+ HeaderReadWriteUtils::setIntAttribute(outAttributeMap, EXTENDED_REGION_SIZE_KEY,
+ extendedRegionSize);
+ // Set the current time as the generation time.
+ HeaderReadWriteUtils::setIntAttribute(outAttributeMap, DATE_KEY,
+ TimeKeeper::peekCurrentTime());
+ HeaderReadWriteUtils::setCodePointVectorAttribute(outAttributeMap, LOCALE_KEY, mLocale);
+ if (updatesLastDecayedTime) {
+ // Set current time as the last updated time.
+ HeaderReadWriteUtils::setIntAttribute(outAttributeMap, LAST_DECAYED_TIME_KEY,
+ TimeKeeper::peekCurrentTime());
+ }
+}
+
+/* static */ DictionaryHeaderStructurePolicy::AttributeMap
+ HeaderPolicy::createAttributeMapAndReadAllAttributes(const uint8_t *const dictBuf) {
+ DictionaryHeaderStructurePolicy::AttributeMap attributeMap;
+ HeaderReadWriteUtils::fetchAllHeaderAttributes(dictBuf, &attributeMap);
+ return attributeMap;
+}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/header/header_policy.h b/third_party/android_prediction/suggest/policyimpl/dictionary/header/header_policy.h
new file mode 100644
index 0000000..b7dc120
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/header/header_policy.h
@@ -0,0 +1,307 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_HEADER_POLICY_H
+#define LATINIME_HEADER_POLICY_H
+
+#include <cstdint>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/policy/dictionary_header_structure_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/header/header_read_write_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/format_utils.h"
+#include "third_party/android_prediction/utils/char_utils.h"
+#include "third_party/android_prediction/utils/time_keeper.h"
+
+namespace latinime {
+
+class HeaderPolicy : public DictionaryHeaderStructurePolicy {
+ public:
+ // Reads information from existing dictionary buffer.
+ HeaderPolicy(const uint8_t *const dictBuf, const FormatUtils::FORMAT_VERSION formatVersion)
+ : mDictFormatVersion(formatVersion),
+ mDictionaryFlags(HeaderReadWriteUtils::getFlags(dictBuf)),
+ mSize(HeaderReadWriteUtils::getHeaderSize(dictBuf)),
+ mAttributeMap(createAttributeMapAndReadAllAttributes(dictBuf)),
+ mLocale(readLocale()),
+ mMultiWordCostMultiplier(readMultipleWordCostMultiplier()),
+ mRequiresGermanUmlautProcessing(readRequiresGermanUmlautProcessing()),
+ mIsDecayingDict(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap,
+ IS_DECAYING_DICT_KEY, false /* defaultValue */)),
+ mDate(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
+ DATE_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)),
+ mLastDecayedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
+ LAST_DECAYED_TIME_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)),
+ mUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
+ UNIGRAM_COUNT_KEY, 0 /* defaultValue */)),
+ mBigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
+ BIGRAM_COUNT_KEY, 0 /* defaultValue */)),
+ mExtendedRegionSize(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
+ EXTENDED_REGION_SIZE_KEY, 0 /* defaultValue */)),
+ mHasHistoricalInfoOfWords(HeaderReadWriteUtils::readBoolAttributeValue(
+ &mAttributeMap, HAS_HISTORICAL_INFO_KEY, false /* defaultValue */)),
+ mForgettingCurveOccurrencesToLevelUp(HeaderReadWriteUtils::readIntAttributeValue(
+ &mAttributeMap, FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP_KEY,
+ DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP)),
+ mForgettingCurveProbabilityValuesTableId(HeaderReadWriteUtils::readIntAttributeValue(
+ &mAttributeMap, FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY,
+ DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID)),
+ mForgettingCurveDurationToLevelDown(HeaderReadWriteUtils::readIntAttributeValue(
+ &mAttributeMap, FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS_KEY,
+ DEFAULT_FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS)),
+ mMaxUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(
+ &mAttributeMap, MAX_UNIGRAM_COUNT_KEY, DEFAULT_MAX_UNIGRAM_COUNT)),
+ mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue(
+ &mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)) {}
+
+ // Constructs header information using an attribute map.
+ HeaderPolicy(const FormatUtils::FORMAT_VERSION dictFormatVersion,
+ const std::vector<int> &locale,
+ const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap)
+ : mDictFormatVersion(dictFormatVersion),
+ mDictionaryFlags(HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap(
+ attributeMap)), mSize(0), mAttributeMap(*attributeMap), mLocale(locale),
+ mMultiWordCostMultiplier(readMultipleWordCostMultiplier()),
+ mRequiresGermanUmlautProcessing(readRequiresGermanUmlautProcessing()),
+ mIsDecayingDict(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap,
+ IS_DECAYING_DICT_KEY, false /* defaultValue */)),
+ mDate(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
+ DATE_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)),
+ mLastDecayedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
+ DATE_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)),
+ mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0),
+ mHasHistoricalInfoOfWords(HeaderReadWriteUtils::readBoolAttributeValue(
+ &mAttributeMap, HAS_HISTORICAL_INFO_KEY, false /* defaultValue */)),
+ mForgettingCurveOccurrencesToLevelUp(HeaderReadWriteUtils::readIntAttributeValue(
+ &mAttributeMap, FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP_KEY,
+ DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP)),
+ mForgettingCurveProbabilityValuesTableId(HeaderReadWriteUtils::readIntAttributeValue(
+ &mAttributeMap, FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY,
+ DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID)),
+ mForgettingCurveDurationToLevelDown(HeaderReadWriteUtils::readIntAttributeValue(
+ &mAttributeMap, FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS_KEY,
+ DEFAULT_FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS)),
+ mMaxUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(
+ &mAttributeMap, MAX_UNIGRAM_COUNT_KEY, DEFAULT_MAX_UNIGRAM_COUNT)),
+ mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue(
+ &mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)) {}
+
+ // Copy header information
+ HeaderPolicy(const HeaderPolicy *const headerPolicy)
+ : mDictFormatVersion(headerPolicy->mDictFormatVersion),
+ mDictionaryFlags(headerPolicy->mDictionaryFlags), mSize(headerPolicy->mSize),
+ mAttributeMap(headerPolicy->mAttributeMap), mLocale(headerPolicy->mLocale),
+ mMultiWordCostMultiplier(headerPolicy->mMultiWordCostMultiplier),
+ mRequiresGermanUmlautProcessing(headerPolicy->mRequiresGermanUmlautProcessing),
+ mIsDecayingDict(headerPolicy->mIsDecayingDict),
+ mDate(headerPolicy->mDate), mLastDecayedTime(headerPolicy->mLastDecayedTime),
+ mUnigramCount(headerPolicy->mUnigramCount), mBigramCount(headerPolicy->mBigramCount),
+ mExtendedRegionSize(headerPolicy->mExtendedRegionSize),
+ mHasHistoricalInfoOfWords(headerPolicy->mHasHistoricalInfoOfWords),
+ mForgettingCurveOccurrencesToLevelUp(
+ headerPolicy->mForgettingCurveOccurrencesToLevelUp),
+ mForgettingCurveProbabilityValuesTableId(
+ headerPolicy->mForgettingCurveProbabilityValuesTableId),
+ mForgettingCurveDurationToLevelDown(
+ headerPolicy->mForgettingCurveDurationToLevelDown),
+ mMaxUnigramCount(headerPolicy->mMaxUnigramCount),
+ mMaxBigramCount(headerPolicy->mMaxBigramCount) {}
+
+ // Temporary dummy header.
+ HeaderPolicy()
+ : mDictFormatVersion(FormatUtils::UNKNOWN_VERSION), mDictionaryFlags(0), mSize(0),
+ mAttributeMap(), mLocale(CharUtils::EMPTY_STRING), mMultiWordCostMultiplier(0.0f),
+ mRequiresGermanUmlautProcessing(false), mIsDecayingDict(false),
+ mDate(0), mLastDecayedTime(0), mUnigramCount(0), mBigramCount(0),
+ mExtendedRegionSize(0), mHasHistoricalInfoOfWords(false),
+ mForgettingCurveOccurrencesToLevelUp(0), mForgettingCurveProbabilityValuesTableId(0),
+ mForgettingCurveDurationToLevelDown(0), mMaxUnigramCount(0), mMaxBigramCount(0) {}
+
+ ~HeaderPolicy() {}
+
+ virtual int getFormatVersionNumber() const {
+ // Conceptually this converts the symbolic value we use in the code into the
+ // hardcoded of the bytes in the file. But we want the constants to be the
+ // same so we use them for both here.
+ switch (mDictFormatVersion) {
+ case FormatUtils::VERSION_2:
+ return FormatUtils::VERSION_2;
+ case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
+ return FormatUtils::VERSION_4_ONLY_FOR_TESTING;
+ case FormatUtils::VERSION_4:
+ return FormatUtils::VERSION_4;
+ case FormatUtils::VERSION_4_DEV:
+ return FormatUtils::VERSION_4_DEV;
+ default:
+ return FormatUtils::UNKNOWN_VERSION;
+ }
+ }
+
+ AK_FORCE_INLINE bool isValid() const {
+ // Decaying dictionary must have historical information.
+ if (!mIsDecayingDict) {
+ return true;
+ }
+ if (mHasHistoricalInfoOfWords) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ AK_FORCE_INLINE int getSize() const {
+ return mSize;
+ }
+
+ AK_FORCE_INLINE float getMultiWordCostMultiplier() const {
+ return mMultiWordCostMultiplier;
+ }
+
+ AK_FORCE_INLINE bool isDecayingDict() const {
+ return mIsDecayingDict;
+ }
+
+ AK_FORCE_INLINE bool requiresGermanUmlautProcessing() const {
+ return mRequiresGermanUmlautProcessing;
+ }
+
+ AK_FORCE_INLINE int getDate() const {
+ return mDate;
+ }
+
+ AK_FORCE_INLINE int getLastDecayedTime() const {
+ return mLastDecayedTime;
+ }
+
+ AK_FORCE_INLINE int getUnigramCount() const {
+ return mUnigramCount;
+ }
+
+ AK_FORCE_INLINE int getBigramCount() const {
+ return mBigramCount;
+ }
+
+ AK_FORCE_INLINE int getExtendedRegionSize() const {
+ return mExtendedRegionSize;
+ }
+
+ AK_FORCE_INLINE bool hasHistoricalInfoOfWords() const {
+ return mHasHistoricalInfoOfWords;
+ }
+
+ AK_FORCE_INLINE bool shouldBoostExactMatches() const {
+ // TODO: Investigate better ways to handle exact matches for personalized dictionaries.
+ return !isDecayingDict();
+ }
+
+ const DictionaryHeaderStructurePolicy::AttributeMap *getAttributeMap() const {
+ return &mAttributeMap;
+ }
+
+ AK_FORCE_INLINE int getForgettingCurveOccurrencesToLevelUp() const {
+ return mForgettingCurveOccurrencesToLevelUp;
+ }
+
+ AK_FORCE_INLINE int getForgettingCurveProbabilityValuesTableId() const {
+ return mForgettingCurveProbabilityValuesTableId;
+ }
+
+ AK_FORCE_INLINE int getForgettingCurveDurationToLevelDown() const {
+ return mForgettingCurveDurationToLevelDown;
+ }
+
+ AK_FORCE_INLINE int getMaxUnigramCount() const {
+ return mMaxUnigramCount;
+ }
+
+ AK_FORCE_INLINE int getMaxBigramCount() const {
+ return mMaxBigramCount;
+ }
+
+ void readHeaderValueOrQuestionMark(const char *const key,
+ int *outValue, int outValueSize) const;
+
+ bool fillInAndWriteHeaderToBuffer(const bool updatesLastDecayedTime,
+ const int unigramCount, const int bigramCount,
+ const int extendedRegionSize, BufferWithExtendableBuffer *const outBuffer) const;
+
+ void fillInHeader(const bool updatesLastDecayedTime,
+ const int unigramCount, const int bigramCount, const int extendedRegionSize,
+ DictionaryHeaderStructurePolicy::AttributeMap *outAttributeMap) const;
+
+ AK_FORCE_INLINE const std::vector<int> *getLocale() const {
+ return &mLocale;
+ }
+
+ bool supportsBeginningOfSentence() const {
+ return mDictFormatVersion >= FormatUtils::VERSION_4;
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HeaderPolicy);
+
+ static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY;
+ static const char *const REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY;
+ static const char *const IS_DECAYING_DICT_KEY;
+ static const char *const DATE_KEY;
+ static const char *const LAST_DECAYED_TIME_KEY;
+ static const char *const UNIGRAM_COUNT_KEY;
+ static const char *const BIGRAM_COUNT_KEY;
+ static const char *const EXTENDED_REGION_SIZE_KEY;
+ static const char *const HAS_HISTORICAL_INFO_KEY;
+ static const char *const LOCALE_KEY;
+ static const char *const FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP_KEY;
+ static const char *const FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY;
+ static const char *const FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS_KEY;
+ static const char *const MAX_UNIGRAM_COUNT_KEY;
+ static const char *const MAX_BIGRAM_COUNT_KEY;
+ static const int DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE;
+ static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE;
+ static const int DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP;
+ static const int DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID;
+ static const int DEFAULT_FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS;
+ static const int DEFAULT_MAX_UNIGRAM_COUNT;
+ static const int DEFAULT_MAX_BIGRAM_COUNT;
+
+ const FormatUtils::FORMAT_VERSION mDictFormatVersion;
+ const HeaderReadWriteUtils::DictionaryFlags mDictionaryFlags;
+ const int mSize;
+ DictionaryHeaderStructurePolicy::AttributeMap mAttributeMap;
+ const std::vector<int> mLocale;
+ const float mMultiWordCostMultiplier;
+ const bool mRequiresGermanUmlautProcessing;
+ const bool mIsDecayingDict;
+ const int mDate;
+ const int mLastDecayedTime;
+ const int mUnigramCount;
+ const int mBigramCount;
+ const int mExtendedRegionSize;
+ const bool mHasHistoricalInfoOfWords;
+ const int mForgettingCurveOccurrencesToLevelUp;
+ const int mForgettingCurveProbabilityValuesTableId;
+ const int mForgettingCurveDurationToLevelDown;
+ const int mMaxUnigramCount;
+ const int mMaxBigramCount;
+
+ const std::vector<int> readLocale() const;
+ float readMultipleWordCostMultiplier() const;
+ bool readRequiresGermanUmlautProcessing() const;
+
+ static DictionaryHeaderStructurePolicy::AttributeMap createAttributeMapAndReadAllAttributes(
+ const uint8_t *const dictBuf);
+};
+} // namespace latinime
+#endif /* LATINIME_HEADER_POLICY_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
new file mode 100644
index 0000000..c242036
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
@@ -0,0 +1,231 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/header/header_read_write_utils.h"
+
+#include <cctype>
+#include <cstdio>
+#include <vector>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/byte_array_utils.h"
+
+namespace latinime {
+
+// Number of base-10 digits in the largest integer + 1 to leave room for a zero terminator.
+// As such, this is the maximum number of characters will be needed to represent an int as a
+// string, including the terminator; this is used as the size of a string buffer large enough to
+// hold any value that is intended to fit in an integer, e.g. in the code that reads the header
+// of the binary dictionary where a {key,value} string pair scheme is used.
+const int HeaderReadWriteUtils::LARGEST_INT_DIGIT_COUNT = 11;
+
+const int HeaderReadWriteUtils::MAX_ATTRIBUTE_KEY_LENGTH = 256;
+const int HeaderReadWriteUtils::MAX_ATTRIBUTE_VALUE_LENGTH = 256;
+
+const int HeaderReadWriteUtils::HEADER_MAGIC_NUMBER_SIZE = 4;
+const int HeaderReadWriteUtils::HEADER_DICTIONARY_VERSION_SIZE = 2;
+const int HeaderReadWriteUtils::HEADER_FLAG_SIZE = 2;
+const int HeaderReadWriteUtils::HEADER_SIZE_FIELD_SIZE = 4;
+
+const HeaderReadWriteUtils::DictionaryFlags HeaderReadWriteUtils::NO_FLAGS = 0;
+
+typedef DictionaryHeaderStructurePolicy::AttributeMap AttributeMap;
+
+/* static */ int HeaderReadWriteUtils::getHeaderSize(const uint8_t *const dictBuf) {
+ // See the format of the header in the comment in
+ // BinaryDictionaryFormatUtils::detectFormatVersion()
+ return ByteArrayUtils::readUint32(dictBuf, HEADER_MAGIC_NUMBER_SIZE
+ + HEADER_DICTIONARY_VERSION_SIZE + HEADER_FLAG_SIZE);
+}
+
+/* static */ HeaderReadWriteUtils::DictionaryFlags
+ HeaderReadWriteUtils::getFlags(const uint8_t *const dictBuf) {
+ return ByteArrayUtils::readUint16(dictBuf,
+ HEADER_MAGIC_NUMBER_SIZE + HEADER_DICTIONARY_VERSION_SIZE);
+}
+
+/* static */ HeaderReadWriteUtils::DictionaryFlags
+ HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap(
+ const AttributeMap *const attributeMap) {
+ return NO_FLAGS;
+}
+
+/* static */ void HeaderReadWriteUtils::fetchAllHeaderAttributes(const uint8_t *const dictBuf,
+ AttributeMap *const headerAttributes) {
+ const int headerSize = getHeaderSize(dictBuf);
+ int pos = getHeaderOptionsPosition();
+ if (pos == NOT_A_DICT_POS) {
+ // The header doesn't have header options.
+ return;
+ }
+ int keyBuffer[MAX_ATTRIBUTE_KEY_LENGTH];
+ int valueBuffer[MAX_ATTRIBUTE_VALUE_LENGTH];
+ while (pos < headerSize) {
+ const int keyLength = ByteArrayUtils::readStringAndAdvancePosition(dictBuf,
+ MAX_ATTRIBUTE_KEY_LENGTH, keyBuffer, &pos);
+ std::vector<int> key;
+ key.insert(key.end(), keyBuffer, keyBuffer + keyLength);
+ const int valueLength = ByteArrayUtils::readStringAndAdvancePosition(dictBuf,
+ MAX_ATTRIBUTE_VALUE_LENGTH, valueBuffer, &pos);
+ std::vector<int> value;
+ value.insert(value.end(), valueBuffer, valueBuffer + valueLength);
+ headerAttributes->insert(AttributeMap::value_type(key, value));
+ }
+}
+
+/* static */ bool HeaderReadWriteUtils::writeDictionaryVersion(
+ BufferWithExtendableBuffer *const buffer, const FormatUtils::FORMAT_VERSION version,
+ int *const writingPos) {
+ if (!buffer->writeUintAndAdvancePosition(FormatUtils::MAGIC_NUMBER, HEADER_MAGIC_NUMBER_SIZE,
+ writingPos)) {
+ return false;
+ }
+ switch (version) {
+ case FormatUtils::VERSION_2:
+ // Version 2 dictionary writing is not supported.
+ return false;
+ case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
+ case FormatUtils::VERSION_4:
+ case FormatUtils::VERSION_4_DEV:
+ return buffer->writeUintAndAdvancePosition(version /* data */,
+ HEADER_DICTIONARY_VERSION_SIZE, writingPos);
+ default:
+ return false;
+ }
+}
+
+/* static */ bool HeaderReadWriteUtils::writeDictionaryFlags(
+ BufferWithExtendableBuffer *const buffer, const DictionaryFlags flags,
+ int *const writingPos) {
+ return buffer->writeUintAndAdvancePosition(flags, HEADER_FLAG_SIZE, writingPos);
+}
+
+/* static */ bool HeaderReadWriteUtils::writeDictionaryHeaderSize(
+ BufferWithExtendableBuffer *const buffer, const int size, int *const writingPos) {
+ return buffer->writeUintAndAdvancePosition(size, HEADER_SIZE_FIELD_SIZE, writingPos);
+}
+
+/* static */ bool HeaderReadWriteUtils::writeHeaderAttributes(
+ BufferWithExtendableBuffer *const buffer, const AttributeMap *const headerAttributes,
+ int *const writingPos) {
+ for (AttributeMap::const_iterator it = headerAttributes->begin();
+ it != headerAttributes->end(); ++it) {
+ if (it->first.empty() || it->second.empty()) {
+ continue;
+ }
+ // Write a key.
+ if (!buffer->writeCodePointsAndAdvancePosition(&(it->first.at(0)), it->first.size(),
+ true /* writesTerminator */, writingPos)) {
+ return false;
+ }
+ // Write a value.
+ if (!buffer->writeCodePointsAndAdvancePosition(&(it->second.at(0)), it->second.size(),
+ true /* writesTerminator */, writingPos)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+/* static */ void HeaderReadWriteUtils::setCodePointVectorAttribute(
+ AttributeMap *const headerAttributes, const char *const key, const std::vector<int> value) {
+ AttributeMap::key_type keyVector;
+ insertCharactersIntoVector(key, &keyVector);
+ (*headerAttributes)[keyVector] = value;
+}
+
+/* static */ void HeaderReadWriteUtils::setBoolAttribute(AttributeMap *const headerAttributes,
+ const char *const key, const bool value) {
+ setIntAttribute(headerAttributes, key, value ? 1 : 0);
+}
+
+/* static */ void HeaderReadWriteUtils::setIntAttribute(AttributeMap *const headerAttributes,
+ const char *const key, const int value) {
+ AttributeMap::key_type keyVector;
+ insertCharactersIntoVector(key, &keyVector);
+ setIntAttributeInner(headerAttributes, &keyVector, value);
+}
+
+/* static */ void HeaderReadWriteUtils::setIntAttributeInner(AttributeMap *const headerAttributes,
+ const AttributeMap::key_type *const key, const int value) {
+ AttributeMap::mapped_type valueVector;
+ char charBuf[LARGEST_INT_DIGIT_COUNT];
+ snprintf(charBuf, sizeof(charBuf), "%d", value);
+ insertCharactersIntoVector(charBuf, &valueVector);
+ (*headerAttributes)[*key] = valueVector;
+}
+
+/* static */ const std::vector<int> HeaderReadWriteUtils::readCodePointVectorAttributeValue(
+ const AttributeMap *const headerAttributes, const char *const key) {
+ AttributeMap::key_type keyVector;
+ insertCharactersIntoVector(key, &keyVector);
+ AttributeMap::const_iterator it = headerAttributes->find(keyVector);
+ if (it == headerAttributes->end()) {
+ return std::vector<int>();
+ } else {
+ return it->second;
+ }
+}
+
+/* static */ bool HeaderReadWriteUtils::readBoolAttributeValue(
+ const AttributeMap *const headerAttributes, const char *const key,
+ const bool defaultValue) {
+ const int intDefaultValue = defaultValue ? 1 : 0;
+ const int intValue = readIntAttributeValue(headerAttributes, key, intDefaultValue);
+ return intValue != 0;
+}
+
+/* static */ int HeaderReadWriteUtils::readIntAttributeValue(
+ const AttributeMap *const headerAttributes, const char *const key,
+ const int defaultValue) {
+ AttributeMap::key_type keyVector;
+ insertCharactersIntoVector(key, &keyVector);
+ return readIntAttributeValueInner(headerAttributes, &keyVector, defaultValue);
+}
+
+/* static */ int HeaderReadWriteUtils::readIntAttributeValueInner(
+ const AttributeMap *const headerAttributes, const AttributeMap::key_type *const key,
+ const int defaultValue) {
+ AttributeMap::const_iterator it = headerAttributes->find(*key);
+ if (it != headerAttributes->end()) {
+ int value = 0;
+ bool isNegative = false;
+ for (size_t i = 0; i < it->second.size(); ++i) {
+ if (i == 0 && it->second.at(i) == '-') {
+ isNegative = true;
+ } else {
+ if (!isdigit(it->second.at(i))) {
+ // If not a number.
+ return defaultValue;
+ }
+ value *= 10;
+ value += it->second.at(i) - '0';
+ }
+ }
+ return isNegative ? -value : value;
+ }
+ return defaultValue;
+}
+
+/* static */ void HeaderReadWriteUtils::insertCharactersIntoVector(const char *const characters,
+ std::vector<int> *const vector) {
+ for (int i = 0; characters[i]; ++i) {
+ vector->push_back(characters[i]);
+ }
+}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/header/header_read_write_utils.h b/third_party/android_prediction/suggest/policyimpl/dictionary/header/header_read_write_utils.h
new file mode 100644
index 0000000..03cfc54
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/header/header_read_write_utils.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_HEADER_READ_WRITE_UTILS_H
+#define LATINIME_HEADER_READ_WRITE_UTILS_H
+
+#include <cstdint>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/policy/dictionary_header_structure_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/format_utils.h"
+
+namespace latinime {
+
+class BufferWithExtendableBuffer;
+
+class HeaderReadWriteUtils {
+ public:
+ typedef uint16_t DictionaryFlags;
+
+ static int getHeaderSize(const uint8_t *const dictBuf);
+
+ static DictionaryFlags getFlags(const uint8_t *const dictBuf);
+
+ static AK_FORCE_INLINE int getHeaderOptionsPosition() {
+ return HEADER_MAGIC_NUMBER_SIZE + HEADER_DICTIONARY_VERSION_SIZE + HEADER_FLAG_SIZE
+ + HEADER_SIZE_FIELD_SIZE;
+ }
+
+ static DictionaryFlags createAndGetDictionaryFlagsUsingAttributeMap(
+ const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap);
+
+ static void fetchAllHeaderAttributes(const uint8_t *const dictBuf,
+ DictionaryHeaderStructurePolicy::AttributeMap *const headerAttributes);
+
+ static bool writeDictionaryVersion(BufferWithExtendableBuffer *const buffer,
+ const FormatUtils::FORMAT_VERSION version, int *const writingPos);
+
+ static bool writeDictionaryFlags(BufferWithExtendableBuffer *const buffer,
+ const DictionaryFlags flags, int *const writingPos);
+
+ static bool writeDictionaryHeaderSize(BufferWithExtendableBuffer *const buffer,
+ const int size, int *const writingPos);
+
+ static bool writeHeaderAttributes(BufferWithExtendableBuffer *const buffer,
+ const DictionaryHeaderStructurePolicy::AttributeMap *const headerAttributes,
+ int *const writingPos);
+
+ /**
+ * Methods for header attributes.
+ */
+ static void setCodePointVectorAttribute(
+ DictionaryHeaderStructurePolicy::AttributeMap *const headerAttributes,
+ const char *const key, const std::vector<int> value);
+
+ static void setBoolAttribute(
+ DictionaryHeaderStructurePolicy::AttributeMap *const headerAttributes,
+ const char *const key, const bool value);
+
+ static void setIntAttribute(
+ DictionaryHeaderStructurePolicy::AttributeMap *const headerAttributes,
+ const char *const key, const int value);
+
+ static const std::vector<int> readCodePointVectorAttributeValue(
+ const DictionaryHeaderStructurePolicy::AttributeMap *const headerAttributes,
+ const char *const key);
+
+ static bool readBoolAttributeValue(
+ const DictionaryHeaderStructurePolicy::AttributeMap *const headerAttributes,
+ const char *const key, const bool defaultValue);
+
+ static int readIntAttributeValue(
+ const DictionaryHeaderStructurePolicy::AttributeMap *const headerAttributes,
+ const char *const key, const int defaultValue);
+
+ static void insertCharactersIntoVector(const char *const characters,
+ DictionaryHeaderStructurePolicy::AttributeMap::key_type *const key);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderReadWriteUtils);
+
+ static const int LARGEST_INT_DIGIT_COUNT;
+ static const int MAX_ATTRIBUTE_KEY_LENGTH;
+ static const int MAX_ATTRIBUTE_VALUE_LENGTH;
+
+ static const int HEADER_MAGIC_NUMBER_SIZE;
+ static const int HEADER_DICTIONARY_VERSION_SIZE;
+ static const int HEADER_FLAG_SIZE;
+ static const int HEADER_SIZE_FIELD_SIZE;
+
+ // Value for the "flags" field. It's unused at the moment.
+ static const DictionaryFlags NO_FLAGS;
+
+ static void setIntAttributeInner(
+ DictionaryHeaderStructurePolicy::AttributeMap *const headerAttributes,
+ const DictionaryHeaderStructurePolicy::AttributeMap::key_type *const key,
+ const int value);
+
+ static int readIntAttributeValueInner(
+ const DictionaryHeaderStructurePolicy::AttributeMap *const headerAttributes,
+ const DictionaryHeaderStructurePolicy::AttributeMap::key_type *const key,
+ const int defaultValue);
+};
+}
+#endif /* LATINIME_HEADER_READ_WRITE_UTILS_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/Readme.txt b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/Readme.txt
new file mode 100644
index 0000000..9e29e83
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/Readme.txt
@@ -0,0 +1 @@
+Files under this directory have been auto generated.
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.cpp
new file mode 100644
index 0000000..332d538
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.cpp
@@ -0,0 +1,290 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!!
+ * Do not edit this file other than updating policy's interface.
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h"
+
+#include "third_party/android_prediction/suggest/core/dictionary/property/bigram_property.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/header/header_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
+ bool *const outHasNext, int *const bigramEntryPos) const {
+ const BigramEntry bigramEntry =
+ mBigramDictContent->getBigramEntryAndAdvancePosition(bigramEntryPos);
+ if (outBigramPos) {
+ // Lookup target PtNode position.
+ *outBigramPos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
+ bigramEntry.getTargetTerminalId());
+ }
+ if (outProbability) {
+ if (bigramEntry.hasHistoricalInfo()) {
+ *outProbability =
+ ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo(),
+ mHeaderPolicy);
+ } else {
+ *outProbability = bigramEntry.getProbability();
+ }
+ }
+ if (outHasNext) {
+ *outHasNext = bigramEntry.hasNext();
+ }
+}
+
+bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId,
+ const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) {
+ // 1. The word has no bigrams yet.
+ // 2. The word has bigrams, and there is the target in the list.
+ // 3. The word has bigrams, and there is an invalid entry that can be reclaimed.
+ // 4. The word has bigrams. We have to append new bigram entry to the list.
+ // 5. Same as 4, but the list is the last entry of the content file.
+ if (outAddedNewEntry) {
+ *outAddedNewEntry = false;
+ }
+ const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ if (bigramListPos == NOT_A_DICT_POS) {
+ // Case 1. PtNode that doesn't have a bigram list.
+ // Create new bigram list.
+ if (!mBigramDictContent->createNewBigramList(terminalId)) {
+ return false;
+ }
+ const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
+ newTargetTerminalId);
+ const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(&newBigramEntry,
+ bigramProperty);
+ // Write an entry.
+ const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ if (!mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, writingPos)) {
+ return false;
+ }
+ if (outAddedNewEntry) {
+ *outAddedNewEntry = true;
+ }
+ return true;
+ }
+
+ int tailEntryPos = NOT_A_DICT_POS;
+ const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos,
+ &tailEntryPos);
+ if (tailEntryPos != NOT_A_DICT_POS || entryPosToUpdate == NOT_A_DICT_POS) {
+ // Case 4, 5.
+ // Add new entry to the bigram list.
+ if (tailEntryPos == NOT_A_DICT_POS) {
+ // Case 4. Create new bigram list.
+ if (!mBigramDictContent->createNewBigramList(terminalId)) {
+ return false;
+ }
+ const int destPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ // Copy existing bigram list.
+ if (!mBigramDictContent->copyBigramList(bigramListPos, destPos, &tailEntryPos)) {
+ return false;
+ }
+ }
+ // Write new entry at the tail position of the bigram content.
+ const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
+ newTargetTerminalId);
+ const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
+ &newBigramEntry, bigramProperty);
+ if (!mBigramDictContent->writeBigramEntryAtTail(&bigramEntryToWrite)) {
+ return false;
+ }
+ // Update has next flag of the tail entry.
+ if (!updateHasNextFlag(true /* hasNext */, tailEntryPos)) {
+ return false;
+ }
+ if (outAddedNewEntry) {
+ *outAddedNewEntry = true;
+ }
+ return true;
+ }
+
+ // Case 2. Overwrite the existing entry. Case 3. Reclaim and reuse the existing invalid entry.
+ const BigramEntry originalBigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
+ if (!originalBigramEntry.isValid()) {
+ // Case 3. Reuse the existing invalid entry. outAddedNewEntry is false when an existing
+ // entry is updated.
+ if (outAddedNewEntry) {
+ *outAddedNewEntry = true;
+ }
+ }
+ const BigramEntry updatedBigramEntry =
+ originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId);
+ const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
+ &updatedBigramEntry, bigramProperty);
+ return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate);
+}
+
+bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) {
+ const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ if (bigramListPos == NOT_A_DICT_POS) {
+ // Bigram list doesn't exist.
+ return false;
+ }
+ const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos,
+ nullptr /* outTailEntryPos */);
+ if (entryPosToUpdate == NOT_A_DICT_POS) {
+ // Bigram entry doesn't exist.
+ return false;
+ }
+ const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
+ if (targetTerminalId != bigramEntry.getTargetTerminalId()) {
+ // Bigram entry doesn't exist.
+ return false;
+ }
+ // Remove bigram entry by marking it as invalid entry and overwriting the original entry.
+ const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
+ return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPosToUpdate);
+}
+
+bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,
+ int *const outBigramCount) {
+ const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ if (bigramListPos == NOT_A_DICT_POS) {
+ // Bigram list doesn't exist.
+ return true;
+ }
+ bool hasNext = true;
+ int readingPos = bigramListPos;
+ while (hasNext) {
+ const int entryPos = readingPos;
+ const BigramEntry bigramEntry =
+ mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
+ hasNext = bigramEntry.hasNext();
+ if (!bigramEntry.isValid()) {
+ continue;
+ }
+ const int targetPtNodePos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
+ bigramEntry.getTargetTerminalId());
+ if (targetPtNodePos == NOT_A_DICT_POS) {
+ // Invalidate bigram entry.
+ const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
+ if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
+ return false;
+ }
+ } else if (bigramEntry.hasHistoricalInfo()) {
+ const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
+ bigramEntry.getHistoricalInfo(), mHeaderPolicy);
+ if (ForgettingCurveUtils::needsToKeep(&historicalInfo, mHeaderPolicy)) {
+ const BigramEntry updatedBigramEntry =
+ bigramEntry.updateHistoricalInfoAndGetEntry(&historicalInfo);
+ if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
+ return false;
+ }
+ *outBigramCount += 1;
+ } else {
+ // Remove entry.
+ const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
+ if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
+ return false;
+ }
+ }
+ } else {
+ *outBigramCount += 1;
+ }
+ }
+ return true;
+}
+
+int Ver4BigramListPolicy::getBigramEntryConut(const int terminalId) {
+ const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ if (bigramListPos == NOT_A_DICT_POS) {
+ // Bigram list doesn't exist.
+ return 0;
+ }
+ int bigramCount = 0;
+ bool hasNext = true;
+ int readingPos = bigramListPos;
+ while (hasNext) {
+ const BigramEntry bigramEntry =
+ mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
+ hasNext = bigramEntry.hasNext();
+ if (bigramEntry.isValid()) {
+ bigramCount++;
+ }
+ }
+ return bigramCount;
+}
+
+int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
+ const int bigramListPos, int *const outTailEntryPos) const {
+ if (outTailEntryPos) {
+ *outTailEntryPos = NOT_A_DICT_POS;
+ }
+ bool hasNext = true;
+ int invalidEntryPos = NOT_A_DICT_POS;
+ int readingPos = bigramListPos;
+ while (hasNext) {
+ const int entryPos = readingPos;
+ const BigramEntry bigramEntry =
+ mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
+ hasNext = bigramEntry.hasNext();
+ if (bigramEntry.getTargetTerminalId() == targetTerminalIdToFind) {
+ // Entry with same target is found.
+ return entryPos;
+ } else if (!bigramEntry.isValid()) {
+ // Invalid entry that can be reused is found.
+ invalidEntryPos = entryPos;
+ }
+ if (!hasNext && mBigramDictContent->isContentTailPos(readingPos)) {
+ if (outTailEntryPos) {
+ *outTailEntryPos = entryPos;
+ }
+ }
+ }
+ return invalidEntryPos;
+}
+
+const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
+ const BigramEntry *const originalBigramEntry,
+ const BigramProperty *const bigramProperty) const {
+ // TODO: Consolidate historical info and probability.
+ if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
+ const HistoricalInfo historicalInfoForUpdate(bigramProperty->getTimestamp(),
+ bigramProperty->getLevel(), bigramProperty->getCount());
+ const HistoricalInfo updatedHistoricalInfo =
+ ForgettingCurveUtils::createUpdatedHistoricalInfo(
+ originalBigramEntry->getHistoricalInfo(), bigramProperty->getProbability(),
+ &historicalInfoForUpdate, mHeaderPolicy);
+ return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
+ } else {
+ return originalBigramEntry->updateProbabilityAndGetEntry(bigramProperty->getProbability());
+ }
+}
+
+bool Ver4BigramListPolicy::updateHasNextFlag(const bool hasNext, const int bigramEntryPos) {
+ const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(bigramEntryPos);
+ const BigramEntry updatedBigramEntry = bigramEntry.updateHasNextAndGetEntry(hasNext);
+ return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, bigramEntryPos);
+}
+
+} // namespace v402
+} // namespace backward
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h
new file mode 100644
index 0000000..f9b024b
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!!
+ * Do not edit this file other than updating policy's interface.
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h
+ */
+
+#ifndef LATINIME_BACKWARD_V402_VER4_BIGRAM_LIST_POLICY_H
+#define LATINIME_BACKWARD_V402_VER4_BIGRAM_LIST_POLICY_H
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/policy/dictionary_bigrams_structure_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_entry.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+class BigramDictContent;
+} // namespace v402
+} // namespace backward
+class BigramProperty;
+namespace backward {
+namespace v402 {
+} // namespace v402
+} // namespace backward
+class HeaderPolicy;
+namespace backward {
+namespace v402 {
+class TerminalPositionLookupTable;
+
+class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
+ public:
+ Ver4BigramListPolicy(BigramDictContent *const bigramDictContent,
+ const TerminalPositionLookupTable *const terminalPositionLookupTable,
+ const HeaderPolicy *const headerPolicy)
+ : mBigramDictContent(bigramDictContent),
+ mTerminalPositionLookupTable(terminalPositionLookupTable),
+ mHeaderPolicy(headerPolicy) {}
+
+ void getNextBigram(int *const outBigramPos, int *const outProbability,
+ bool *const outHasNext, int *const bigramEntryPos) const;
+
+ bool skipAllBigrams(int *const pos) const {
+ // Do nothing because we don't need to skip bigram lists in ver4 dictionaries.
+ return true;
+ }
+
+ bool addNewEntry(const int terminalId, const int newTargetTerminalId,
+ const BigramProperty *const bigramProperty, bool *const outAddedNewEntry);
+
+ bool removeEntry(const int terminalId, const int targetTerminalId);
+
+ bool updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,
+ int *const outBigramCount);
+
+ int getBigramEntryConut(const int terminalId);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy);
+
+ int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos,
+ int *const outTailEntryPos) const;
+
+ const BigramEntry createUpdatedBigramEntryFrom(const BigramEntry *const originalBigramEntry,
+ const BigramProperty *const bigramProperty) const;
+
+ bool updateHasNextFlag(const bool hasNext, const int bigramEntryPos);
+
+ BigramDictContent *const mBigramDictContent;
+ const TerminalPositionLookupTable *const mTerminalPositionLookupTable;
+ const HeaderPolicy *const mHeaderPolicy;
+};
+} // namespace v402
+} // namespace backward
+} // namespace latinime
+#endif /* LATINIME_BACKWARD_V402_VER4_BIGRAM_LIST_POLICY_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.cpp
new file mode 100644
index 0000000..cce20a0
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.cpp
@@ -0,0 +1,224 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.h"
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
+ int *const bigramEntryPos) const {
+ const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer();
+ const int bigramEntryTailPos = (*bigramEntryPos) + getBigramEntrySize();
+ if (*bigramEntryPos < 0 || bigramEntryTailPos > bigramListBuffer->getTailPosition()) {
+ AKLOGE("Invalid bigram entry position. bigramEntryPos: %d, bigramEntryTailPos: %d, "
+ "bufSize: %d", *bigramEntryPos, bigramEntryTailPos,
+ bigramListBuffer->getTailPosition());
+ ASSERT(false);
+ return BigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
+ Ver4DictConstants::NOT_A_TERMINAL_ID);
+ }
+ const int bigramFlags = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos);
+ const bool hasNext = (bigramFlags & Ver4DictConstants::BIGRAM_HAS_NEXT_MASK) != 0;
+ int probability = NOT_A_PROBABILITY;
+ int timestamp = NOT_A_TIMESTAMP;
+ int level = 0;
+ int count = 0;
+ if (mHasHistoricalInfo) {
+ timestamp = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::TIME_STAMP_FIELD_SIZE, bigramEntryPos);
+ level = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, bigramEntryPos);
+ count = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::WORD_COUNT_FIELD_SIZE, bigramEntryPos);
+ } else {
+ probability = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::PROBABILITY_SIZE, bigramEntryPos);
+ }
+ const int encodedTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos);
+ const int targetTerminalId =
+ (encodedTargetTerminalId == Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID) ?
+ Ver4DictConstants::NOT_A_TERMINAL_ID : encodedTargetTerminalId;
+ if (mHasHistoricalInfo) {
+ const HistoricalInfo historicalInfo(timestamp, level, count);
+ return BigramEntry(hasNext, probability, &historicalInfo, targetTerminalId);
+ } else {
+ return BigramEntry(hasNext, probability, targetTerminalId);
+ }
+}
+
+bool BigramDictContent::writeBigramEntryAndAdvancePosition(
+ const BigramEntry *const bigramEntryToWrite, int *const entryWritingPos) {
+ BufferWithExtendableBuffer *const bigramListBuffer = getWritableContentBuffer();
+ const int bigramFlags = createAndGetBigramFlags(bigramEntryToWrite->hasNext());
+ if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags,
+ Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram flags. pos: %d, flags: %x", *entryWritingPos, bigramFlags);
+ return false;
+ }
+ if (mHasHistoricalInfo) {
+ const HistoricalInfo *const historicalInfo = bigramEntryToWrite->getHistoricalInfo();
+ if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(),
+ Ver4DictConstants::TIME_STAMP_FIELD_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram timestamps. pos: %d, timestamp: %d", *entryWritingPos,
+ historicalInfo->getTimeStamp());
+ return false;
+ }
+ if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getLevel(),
+ Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram level. pos: %d, level: %d", *entryWritingPos,
+ historicalInfo->getLevel());
+ return false;
+ }
+ if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getCount(),
+ Ver4DictConstants::WORD_COUNT_FIELD_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram count. pos: %d, count: %d", *entryWritingPos,
+ historicalInfo->getCount());
+ return false;
+ }
+ } else {
+ if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getProbability(),
+ Ver4DictConstants::PROBABILITY_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram probability. pos: %d, probability: %d", *entryWritingPos,
+ bigramEntryToWrite->getProbability());
+ return false;
+ }
+ }
+ const int targetTerminalIdToWrite =
+ (bigramEntryToWrite->getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) ?
+ Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID :
+ bigramEntryToWrite->getTargetTerminalId();
+ if (!bigramListBuffer->writeUintAndAdvancePosition(targetTerminalIdToWrite,
+ Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram target terminal id. pos: %d, target terminal id: %d",
+ *entryWritingPos, bigramEntryToWrite->getTargetTerminalId());
+ return false;
+ }
+ return true;
+}
+
+bool BigramDictContent::copyBigramList(const int bigramListPos, const int toPos,
+ int *const outTailEntryPos) {
+ int readingPos = bigramListPos;
+ int writingPos = toPos;
+ bool hasNext = true;
+ while (hasNext) {
+ const BigramEntry bigramEntry = getBigramEntryAndAdvancePosition(&readingPos);
+ hasNext = bigramEntry.hasNext();
+ if (!hasNext) {
+ *outTailEntryPos = writingPos;
+ }
+ if (!writeBigramEntryAndAdvancePosition(&bigramEntry, &writingPos)) {
+ AKLOGE("Cannot write bigram entry to copy. pos: %d", writingPos);
+ return false;
+ }
+ }
+ return true;
+}
+
+bool BigramDictContent::runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const BigramDictContent *const originalBigramDictContent,
+ int *const outBigramEntryCount) {
+ for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin();
+ it != terminalIdMap->end(); ++it) {
+ const int originalBigramListPos =
+ originalBigramDictContent->getBigramListHeadPos(it->first);
+ if (originalBigramListPos == NOT_A_DICT_POS) {
+ // This terminal does not have a bigram list.
+ continue;
+ }
+ const int bigramListPos = getContentBuffer()->getTailPosition();
+ int bigramEntryCount = 0;
+ // Copy bigram list with GC from original content.
+ if (!runGCBigramList(originalBigramListPos, originalBigramDictContent, bigramListPos,
+ terminalIdMap, &bigramEntryCount)) {
+ AKLOGE("Cannot complete GC for the bigram list. original pos: %d, pos: %d",
+ originalBigramListPos, bigramListPos);
+ return false;
+ }
+ if (bigramEntryCount == 0) {
+ // All bigram entries are useless. This terminal does not have a bigram list.
+ continue;
+ }
+ *outBigramEntryCount += bigramEntryCount;
+ // Set bigram list position to the lookup table.
+ if (!getUpdatableAddressLookupTable()->set(it->second, bigramListPos)) {
+ AKLOGE("Cannot set bigram list position. terminal id: %d, pos: %d",
+ it->second, bigramListPos);
+ return false;
+ }
+ }
+ return true;
+}
+
+// Returns whether GC for the bigram list was succeeded or not.
+bool BigramDictContent::runGCBigramList(const int bigramListPos,
+ const BigramDictContent *const sourceBigramDictContent, const int toPos,
+ const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ int *const outEntrycount) {
+ bool hasNext = true;
+ int readingPos = bigramListPos;
+ int writingPos = toPos;
+ int lastEntryPos = NOT_A_DICT_POS;
+ while (hasNext) {
+ const BigramEntry originalBigramEntry =
+ sourceBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
+ hasNext = originalBigramEntry.hasNext();
+ if (originalBigramEntry.getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) {
+ continue;
+ }
+ TerminalPositionLookupTable::TerminalIdMap::const_iterator it =
+ terminalIdMap->find(originalBigramEntry.getTargetTerminalId());
+ if (it == terminalIdMap->end()) {
+ // Target word has been removed.
+ continue;
+ }
+ lastEntryPos = hasNext ? writingPos : NOT_A_DICT_POS;
+ const BigramEntry updatedBigramEntry =
+ originalBigramEntry.updateTargetTerminalIdAndGetEntry(it->second);
+ if (!writeBigramEntryAndAdvancePosition(&updatedBigramEntry, &writingPos)) {
+ AKLOGE("Cannot write bigram entry to run GC. pos: %d", writingPos);
+ return false;
+ }
+ *outEntrycount += 1;
+ }
+ if (lastEntryPos != NOT_A_DICT_POS) {
+ // Update has next flag in the last written entry.
+ const BigramEntry bigramEntry = getBigramEntry(lastEntryPos).updateHasNextAndGetEntry(
+ false /* hasNext */);
+ if (!writeBigramEntry(&bigramEntry, lastEntryPos)) {
+ AKLOGE("Cannot write bigram entry to set hasNext flag after GC. pos: %d", writingPos);
+ return false;
+ }
+ }
+ return true;
+}
+
+} // namespace v402
+} // namespace backward
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.h
new file mode 100644
index 0000000..2bdb740
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
+ */
+
+#ifndef LATINIME_BACKWARD_V402_BIGRAM_DICT_CONTENT_H
+#define LATINIME_BACKWARD_V402_BIGRAM_DICT_CONTENT_H
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_entry.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+class BigramDictContent : public SparseTableDictContent {
+ public:
+ BigramDictContent(const char *const dictPath, const bool hasHistoricalInfo,
+ const bool isUpdatable)
+ : SparseTableDictContent(dictPath,
+ Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION,
+ Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION,
+ Ver4DictConstants::BIGRAM_FILE_EXTENSION, isUpdatable,
+ Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
+ Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
+ mHasHistoricalInfo(hasHistoricalInfo) {}
+
+ BigramDictContent(const bool hasHistoricalInfo)
+ : SparseTableDictContent(Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
+ Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
+ mHasHistoricalInfo(hasHistoricalInfo) {}
+
+ const BigramEntry getBigramEntry(const int bigramEntryPos) const {
+ int readingPos = bigramEntryPos;
+ return getBigramEntryAndAdvancePosition(&readingPos);
+ }
+
+ const BigramEntry getBigramEntryAndAdvancePosition(int *const bigramEntryPos) const;
+
+ // Returns head position of bigram list for a PtNode specified by terminalId.
+ int getBigramListHeadPos(const int terminalId) const {
+ const SparseTable *const addressLookupTable = getAddressLookupTable();
+ if (!addressLookupTable->contains(terminalId)) {
+ return NOT_A_DICT_POS;
+ }
+ return addressLookupTable->get(terminalId);
+ }
+
+ bool writeBigramEntryAtTail(const BigramEntry *const bigramEntryToWrite) {
+ int writingPos = getContentBuffer()->getTailPosition();
+ return writeBigramEntryAndAdvancePosition(bigramEntryToWrite, &writingPos);
+ }
+
+ bool writeBigramEntry(const BigramEntry *const bigramEntryToWrite, const int entryWritingPos) {
+ int writingPos = entryWritingPos;
+ return writeBigramEntryAndAdvancePosition(bigramEntryToWrite, &writingPos);
+ }
+
+ bool writeBigramEntryAndAdvancePosition(const BigramEntry *const bigramEntryToWrite,
+ int *const entryWritingPos);
+
+ bool createNewBigramList(const int terminalId) {
+ const int bigramListPos = getContentBuffer()->getTailPosition();
+ return getUpdatableAddressLookupTable()->set(terminalId, bigramListPos);
+ }
+
+ bool copyBigramList(const int bigramListPos, const int toPos, int *const outTailEntryPos);
+
+ bool flushToFile(const char *const dictPath) const {
+ return flush(dictPath, Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION,
+ Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION,
+ Ver4DictConstants::BIGRAM_FILE_EXTENSION);
+ }
+
+ bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const BigramDictContent *const originalBigramDictContent,
+ int *const outBigramEntryCount);
+
+ bool isContentTailPos(const int pos) const {
+ return pos == getContentBuffer()->getTailPosition();
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(BigramDictContent);
+
+ int createAndGetBigramFlags(const bool hasNext) const {
+ return hasNext ? Ver4DictConstants::BIGRAM_HAS_NEXT_MASK : 0;
+ }
+
+ int getBigramEntrySize() const {
+ if (mHasHistoricalInfo) {
+ return Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE
+ + Ver4DictConstants::TIME_STAMP_FIELD_SIZE
+ + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
+ + Ver4DictConstants::WORD_COUNT_FIELD_SIZE
+ + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
+ } else {
+ return Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE
+ + Ver4DictConstants::PROBABILITY_SIZE
+ + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
+ }
+ }
+
+ bool runGCBigramList(const int bigramListPos,
+ const BigramDictContent *const sourceBigramDictContent, const int toPos,
+ const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ int *const outEntryCount);
+
+ bool mHasHistoricalInfo;
+};
+} // namespace v402
+} // namespace backward
+} // namespace latinime
+#endif /* LATINIME_BACKWARD_V402_BIGRAM_DICT_CONTENT_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_entry.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_entry.h
new file mode 100644
index 0000000..41225fe
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_entry.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h
+ */
+
+#ifndef LATINIME_BACKWARD_V402_BIGRAM_ENTRY_H
+#define LATINIME_BACKWARD_V402_BIGRAM_ENTRY_H
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/historical_info.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+class BigramEntry {
+ public:
+ BigramEntry(const BigramEntry& bigramEntry)
+ : mHasNext(bigramEntry.mHasNext), mProbability(bigramEntry.mProbability),
+ mHistoricalInfo(), mTargetTerminalId(bigramEntry.mTargetTerminalId) {}
+
+ // Entry with historical information.
+ BigramEntry(const bool hasNext, const int probability, const int targetTerminalId)
+ : mHasNext(hasNext), mProbability(probability), mHistoricalInfo(),
+ mTargetTerminalId(targetTerminalId) {}
+
+ // Entry with historical information.
+ BigramEntry(const bool hasNext, const int probability,
+ const HistoricalInfo *const historicalInfo, const int targetTerminalId)
+ : mHasNext(hasNext), mProbability(probability), mHistoricalInfo(*historicalInfo),
+ mTargetTerminalId(targetTerminalId) {}
+
+ const BigramEntry getInvalidatedEntry() const {
+ return updateTargetTerminalIdAndGetEntry(Ver4DictConstants::NOT_A_TERMINAL_ID);
+ }
+
+ const BigramEntry updateHasNextAndGetEntry(const bool hasNext) const {
+ return BigramEntry(hasNext, mProbability, &mHistoricalInfo, mTargetTerminalId);
+ }
+
+ const BigramEntry updateTargetTerminalIdAndGetEntry(const int newTargetTerminalId) const {
+ return BigramEntry(mHasNext, mProbability, &mHistoricalInfo, newTargetTerminalId);
+ }
+
+ const BigramEntry updateProbabilityAndGetEntry(const int probability) const {
+ return BigramEntry(mHasNext, probability, &mHistoricalInfo, mTargetTerminalId);
+ }
+
+ const BigramEntry updateHistoricalInfoAndGetEntry(
+ const HistoricalInfo *const historicalInfo) const {
+ return BigramEntry(mHasNext, mProbability, historicalInfo, mTargetTerminalId);
+ }
+
+ bool isValid() const {
+ return mTargetTerminalId != Ver4DictConstants::NOT_A_TERMINAL_ID;
+ }
+
+ bool hasNext() const {
+ return mHasNext;
+ }
+
+ int getProbability() const {
+ return mProbability;
+ }
+
+ bool hasHistoricalInfo() const {
+ return mHistoricalInfo.isValid();
+ }
+
+ const HistoricalInfo *getHistoricalInfo() const {
+ return &mHistoricalInfo;
+ }
+
+ int getTargetTerminalId() const {
+ return mTargetTerminalId;
+ }
+
+ private:
+ // Copy constructor is public to use this class as a type of return value.
+ DISALLOW_DEFAULT_CONSTRUCTOR(BigramEntry);
+ DISALLOW_ASSIGNMENT_OPERATOR(BigramEntry);
+
+ const bool mHasNext;
+ const int mProbability;
+ const HistoricalInfo mHistoricalInfo;
+ const int mTargetTerminalId;
+};
+} // namespace v402
+} // namespace backward
+} // namespace latinime
+#endif /* LATINIME_BACKWARD_V402_BIGRAM_ENTRY_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/dict_content.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/dict_content.h
new file mode 100644
index 0000000..a970fc0
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/dict_content.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/content/dict_content.h
+ */
+
+#ifndef LATINIME_BACKWARD_V402_DICT_CONTENT_H
+#define LATINIME_BACKWARD_V402_DICT_CONTENT_H
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+class DictContent {
+ public:
+ virtual ~DictContent() {}
+ virtual bool isValid() const = 0;
+
+ protected:
+ DictContent() {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DictContent);
+};
+} // namespace v402
+} // namespace backward
+} // namespace latinime
+#endif /* LATINIME_BACKWARD_V402_DICT_CONTENT_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.cpp
new file mode 100644
index 0000000..0d398e8
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.cpp
@@ -0,0 +1,171 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.h"
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+const ProbabilityEntry ProbabilityDictContent::getProbabilityEntry(const int terminalId) const {
+ if (terminalId < 0 || terminalId >= mSize) {
+ // This method can be called with invalid terminal id during GC.
+ return ProbabilityEntry(0 /* flags */, NOT_A_PROBABILITY);
+ }
+ const BufferWithExtendableBuffer *const buffer = getBuffer();
+ int entryPos = getEntryPos(terminalId);
+ const int flags = buffer->readUintAndAdvancePosition(
+ Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &entryPos);
+ const int probability = buffer->readUintAndAdvancePosition(
+ Ver4DictConstants::PROBABILITY_SIZE, &entryPos);
+ if (mHasHistoricalInfo) {
+ const int timestamp = buffer->readUintAndAdvancePosition(
+ Ver4DictConstants::TIME_STAMP_FIELD_SIZE, &entryPos);
+ const int level = buffer->readUintAndAdvancePosition(
+ Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, &entryPos);
+ const int count = buffer->readUintAndAdvancePosition(
+ Ver4DictConstants::WORD_COUNT_FIELD_SIZE, &entryPos);
+ const HistoricalInfo historicalInfo(timestamp, level, count);
+ return ProbabilityEntry(flags, probability, &historicalInfo);
+ } else {
+ return ProbabilityEntry(flags, probability);
+ }
+}
+
+bool ProbabilityDictContent::setProbabilityEntry(const int terminalId,
+ const ProbabilityEntry *const probabilityEntry) {
+ if (terminalId < 0) {
+ return false;
+ }
+ const int entryPos = getEntryPos(terminalId);
+ if (terminalId >= mSize) {
+ ProbabilityEntry dummyEntry;
+ // Write new entry.
+ int writingPos = getBuffer()->getTailPosition();
+ while (writingPos <= entryPos) {
+ // Fulfilling with dummy entries until writingPos.
+ if (!writeEntry(&dummyEntry, writingPos)) {
+ AKLOGE("Cannot write dummy entry. pos: %d, mSize: %d", writingPos, mSize);
+ return false;
+ }
+ writingPos += getEntrySize();
+ mSize++;
+ }
+ }
+ return writeEntry(probabilityEntry, entryPos);
+}
+
+bool ProbabilityDictContent::flushToFile(const char *const dictPath) const {
+ if (getEntryPos(mSize) < getBuffer()->getTailPosition()) {
+ ProbabilityDictContent probabilityDictContentToWrite(mHasHistoricalInfo);
+ for (int i = 0; i < mSize; ++i) {
+ const ProbabilityEntry probabilityEntry = getProbabilityEntry(i);
+ if (!probabilityDictContentToWrite.setProbabilityEntry(i, &probabilityEntry)) {
+ AKLOGE("Cannot set probability entry in flushToFile. terminalId: %d", i);
+ return false;
+ }
+ }
+ return probabilityDictContentToWrite.flush(dictPath,
+ Ver4DictConstants::FREQ_FILE_EXTENSION);
+ } else {
+ return flush(dictPath, Ver4DictConstants::FREQ_FILE_EXTENSION);
+ }
+}
+
+bool ProbabilityDictContent::runGC(
+ const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const ProbabilityDictContent *const originalProbabilityDictContent) {
+ mSize = 0;
+ for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin();
+ it != terminalIdMap->end(); ++it) {
+ const ProbabilityEntry probabilityEntry =
+ originalProbabilityDictContent->getProbabilityEntry(it->first);
+ if (!setProbabilityEntry(it->second, &probabilityEntry)) {
+ AKLOGE("Cannot set probability entry in runGC. terminalId: %d", it->second);
+ return false;
+ }
+ mSize++;
+ }
+ return true;
+}
+
+int ProbabilityDictContent::getEntrySize() const {
+ if (mHasHistoricalInfo) {
+ return Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE
+ + Ver4DictConstants::PROBABILITY_SIZE
+ + Ver4DictConstants::TIME_STAMP_FIELD_SIZE
+ + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
+ + Ver4DictConstants::WORD_COUNT_FIELD_SIZE;
+ } else {
+ return Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE
+ + Ver4DictConstants::PROBABILITY_SIZE;
+ }
+}
+
+int ProbabilityDictContent::getEntryPos(const int terminalId) const {
+ return terminalId * getEntrySize();
+}
+
+bool ProbabilityDictContent::writeEntry(const ProbabilityEntry *const probabilityEntry,
+ const int entryPos) {
+ BufferWithExtendableBuffer *const bufferToWrite = getWritableBuffer();
+ int writingPos = entryPos;
+ if (!bufferToWrite->writeUintAndAdvancePosition(probabilityEntry->getFlags(),
+ Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &writingPos)) {
+ AKLOGE("Cannot write flags in probability dict content. pos: %d", writingPos);
+ return false;
+ }
+ if (!bufferToWrite->writeUintAndAdvancePosition(probabilityEntry->getProbability(),
+ Ver4DictConstants::PROBABILITY_SIZE, &writingPos)) {
+ AKLOGE("Cannot write probability in probability dict content. pos: %d", writingPos);
+ return false;
+ }
+ if (mHasHistoricalInfo) {
+ const HistoricalInfo *const historicalInfo = probabilityEntry->getHistoricalInfo();
+ if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(),
+ Ver4DictConstants::TIME_STAMP_FIELD_SIZE, &writingPos)) {
+ AKLOGE("Cannot write timestamp in probability dict content. pos: %d", writingPos);
+ return false;
+ }
+ if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getLevel(),
+ Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, &writingPos)) {
+ AKLOGE("Cannot write level in probability dict content. pos: %d", writingPos);
+ return false;
+ }
+ if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getCount(),
+ Ver4DictConstants::WORD_COUNT_FIELD_SIZE, &writingPos)) {
+ AKLOGE("Cannot write count in probability dict content. pos: %d", writingPos);
+ return false;
+ }
+ }
+ return true;
+}
+
+} // namespace v402
+} // namespace backward
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.h
new file mode 100644
index 0000000..da2fb41
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h
+ */
+
+#ifndef LATINIME_BACKWARD_V402_PROBABILITY_DICT_CONTENT_H
+#define LATINIME_BACKWARD_V402_PROBABILITY_DICT_CONTENT_H
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/single_dict_content.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+class ProbabilityEntry;
+
+class ProbabilityDictContent : public SingleDictContent {
+ public:
+ ProbabilityDictContent(const char *const dictPath, const bool hasHistoricalInfo,
+ const bool isUpdatable)
+ : SingleDictContent(dictPath, Ver4DictConstants::FREQ_FILE_EXTENSION, isUpdatable),
+ mHasHistoricalInfo(hasHistoricalInfo),
+ mSize(getBuffer()->getTailPosition() / getEntrySize()) {}
+
+ ProbabilityDictContent(const bool hasHistoricalInfo)
+ : mHasHistoricalInfo(hasHistoricalInfo), mSize(0) {}
+
+ const ProbabilityEntry getProbabilityEntry(const int terminalId) const;
+
+ bool setProbabilityEntry(const int terminalId, const ProbabilityEntry *const probabilityEntry);
+
+ bool flushToFile(const char *const dictPath) const;
+
+ bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const ProbabilityDictContent *const originalProbabilityDictContent);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ProbabilityDictContent);
+
+ int getEntrySize() const;
+
+ int getEntryPos(const int terminalId) const;
+
+ bool writeEntry(const ProbabilityEntry *const probabilityEntry, const int entryPos);
+
+ bool mHasHistoricalInfo;
+ int mSize;
+};
+} // namespace v402
+} // namespace backward
+} // namespace latinime
+#endif /* LATINIME_BACKWARD_V402_PROBABILITY_DICT_CONTENT_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h
new file mode 100644
index 0000000..41d7662
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h
+ */
+
+#ifndef LATINIME_BACKWARD_V402_PROBABILITY_ENTRY_H
+#define LATINIME_BACKWARD_V402_PROBABILITY_ENTRY_H
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/historical_info.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+class ProbabilityEntry {
+ public:
+ ProbabilityEntry(const ProbabilityEntry &probabilityEntry)
+ : mFlags(probabilityEntry.mFlags), mProbability(probabilityEntry.mProbability),
+ mHistoricalInfo(probabilityEntry.mHistoricalInfo) {}
+
+ // Dummy entry
+ ProbabilityEntry()
+ : mFlags(0), mProbability(NOT_A_PROBABILITY), mHistoricalInfo() {}
+
+ // Entry without historical information
+ ProbabilityEntry(const int flags, const int probability)
+ : mFlags(flags), mProbability(probability), mHistoricalInfo() {}
+
+ // Entry with historical information.
+ ProbabilityEntry(const int flags, const int probability,
+ const HistoricalInfo *const historicalInfo)
+ : mFlags(flags), mProbability(probability), mHistoricalInfo(*historicalInfo) {}
+
+ const ProbabilityEntry createEntryWithUpdatedProbability(const int probability) const {
+ return ProbabilityEntry(mFlags, probability, &mHistoricalInfo);
+ }
+
+ const ProbabilityEntry createEntryWithUpdatedHistoricalInfo(
+ const HistoricalInfo *const historicalInfo) const {
+ return ProbabilityEntry(mFlags, mProbability, historicalInfo);
+ }
+
+ bool hasHistoricalInfo() const {
+ return mHistoricalInfo.isValid();
+ }
+
+ int getFlags() const {
+ return mFlags;
+ }
+
+ int getProbability() const {
+ return mProbability;
+ }
+
+ const HistoricalInfo *getHistoricalInfo() const {
+ return &mHistoricalInfo;
+ }
+
+ private:
+ // Copy constructor is public to use this class as a type of return value.
+ DISALLOW_ASSIGNMENT_OPERATOR(ProbabilityEntry);
+
+ const int mFlags;
+ const int mProbability;
+ const HistoricalInfo mHistoricalInfo;
+};
+} // namespace v402
+} // namespace backward
+} // namespace latinime
+#endif /* LATINIME_BACKWARD_V402_PROBABILITY_ENTRY_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/shortcut_dict_content.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/shortcut_dict_content.cpp
new file mode 100644
index 0000000..80901a0
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/shortcut_dict_content.cpp
@@ -0,0 +1,199 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.cpp
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/shortcut_dict_content.h"
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+void ShortcutDictContent::getShortcutEntryAndAdvancePosition(const int maxCodePointCount,
+ int *const outCodePoint, int *const outCodePointCount, int *const outProbability,
+ bool *const outhasNext, int *const shortcutEntryPos) const {
+ const BufferWithExtendableBuffer *const shortcutListBuffer = getContentBuffer();
+ if (*shortcutEntryPos < 0 || *shortcutEntryPos >= shortcutListBuffer->getTailPosition()) {
+ AKLOGE("Invalid shortcut entry position. shortcutEntryPos: %d, bufSize: %d",
+ *shortcutEntryPos, shortcutListBuffer->getTailPosition());
+ ASSERT(false);
+ if (outhasNext) {
+ *outhasNext = false;
+ }
+ if (outCodePointCount) {
+ *outCodePointCount = 0;
+ }
+ return;
+ }
+
+ const int shortcutFlags = shortcutListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos);
+ if (outProbability) {
+ *outProbability = shortcutFlags & Ver4DictConstants::SHORTCUT_PROBABILITY_MASK;
+ }
+ if (outhasNext) {
+ *outhasNext = shortcutFlags & Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK;
+ }
+ if (outCodePoint && outCodePointCount) {
+ shortcutListBuffer->readCodePointsAndAdvancePosition(
+ maxCodePointCount, outCodePoint, outCodePointCount, shortcutEntryPos);
+ }
+}
+
+int ShortcutDictContent::getShortcutListHeadPos(const int terminalId) const {
+ const SparseTable *const addressLookupTable = getAddressLookupTable();
+ if (!addressLookupTable->contains(terminalId)) {
+ return NOT_A_DICT_POS;
+ }
+ return addressLookupTable->get(terminalId);
+}
+
+bool ShortcutDictContent::flushToFile(const char *const dictPath) const {
+ return flush(dictPath, Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION,
+ Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION,
+ Ver4DictConstants::SHORTCUT_FILE_EXTENSION);
+}
+
+bool ShortcutDictContent::runGC(
+ const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const ShortcutDictContent *const originalShortcutDictContent) {
+ for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin();
+ it != terminalIdMap->end(); ++it) {
+ const int originalShortcutListPos =
+ originalShortcutDictContent->getShortcutListHeadPos(it->first);
+ if (originalShortcutListPos == NOT_A_DICT_POS) {
+ continue;
+ }
+ const int shortcutListPos = getContentBuffer()->getTailPosition();
+ // Copy shortcut list from original content.
+ if (!copyShortcutListFromDictContent(originalShortcutListPos, originalShortcutDictContent,
+ shortcutListPos)) {
+ AKLOGE("Cannot copy shortcut list during GC. original pos: %d, pos: %d",
+ originalShortcutListPos, shortcutListPos);
+ return false;
+ }
+ // Set shortcut list position to the lookup table.
+ if (!getUpdatableAddressLookupTable()->set(it->second, shortcutListPos)) {
+ AKLOGE("Cannot set shortcut list position. terminal id: %d, pos: %d",
+ it->second, shortcutListPos);
+ return false;
+ }
+ }
+ return true;
+}
+
+bool ShortcutDictContent::createNewShortcutList(const int terminalId) {
+ const int shortcutListListPos = getContentBuffer()->getTailPosition();
+ return getUpdatableAddressLookupTable()->set(terminalId, shortcutListListPos);
+}
+
+bool ShortcutDictContent::copyShortcutList(const int shortcutListPos, const int toPos) {
+ return copyShortcutListFromDictContent(shortcutListPos, this, toPos);
+}
+
+bool ShortcutDictContent::copyShortcutListFromDictContent(const int shortcutListPos,
+ const ShortcutDictContent *const sourceShortcutDictContent, const int toPos) {
+ bool hasNext = true;
+ int readingPos = shortcutListPos;
+ int writingPos = toPos;
+ int codePoints[MAX_WORD_LENGTH];
+ while (hasNext) {
+ int probability = 0;
+ int codePointCount = 0;
+ sourceShortcutDictContent->getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH,
+ codePoints, &codePointCount, &probability, &hasNext, &readingPos);
+ if (!writeShortcutEntryAndAdvancePosition(codePoints, codePointCount, probability,
+ hasNext, &writingPos)) {
+ AKLOGE("Cannot write shortcut entry to copy. pos: %d", writingPos);
+ return false;
+ }
+ }
+ return true;
+}
+
+bool ShortcutDictContent::setProbability(const int probability, const int shortcutEntryPos) {
+ BufferWithExtendableBuffer *const shortcutListBuffer = getWritableContentBuffer();
+ const int shortcutFlags = shortcutListBuffer->readUint(
+ Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos);
+ const bool hasNext = shortcutFlags & Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK;
+ const int shortcutFlagsToWrite = createAndGetShortcutFlags(probability, hasNext);
+ return shortcutListBuffer->writeUint(shortcutFlagsToWrite,
+ Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos);
+}
+
+bool ShortcutDictContent::writeShortcutEntryAndAdvancePosition(const int *const codePoint,
+ const int codePointCount, const int probability, const bool hasNext,
+ int *const shortcutEntryPos) {
+ BufferWithExtendableBuffer *const shortcutListBuffer = getWritableContentBuffer();
+ const int shortcutFlags = createAndGetShortcutFlags(probability, hasNext);
+ if (!shortcutListBuffer->writeUintAndAdvancePosition(shortcutFlags,
+ Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos)) {
+ AKLOGE("Cannot write shortcut flags. flags; %x, pos: %d", shortcutFlags, *shortcutEntryPos);
+ return false;
+ }
+ if (!shortcutListBuffer->writeCodePointsAndAdvancePosition(codePoint, codePointCount,
+ true /* writesTerminator */, shortcutEntryPos)) {
+ AKLOGE("Cannot write shortcut target code points. pos: %d", *shortcutEntryPos);
+ return false;
+ }
+ return true;
+}
+
+// Find a shortcut entry that has specified target and return its position.
+int ShortcutDictContent::findShortcutEntryAndGetPos(const int shortcutListPos,
+ const int *const targetCodePointsToFind, const int codePointCount) const {
+ bool hasNext = true;
+ int readingPos = shortcutListPos;
+ int targetCodePoints[MAX_WORD_LENGTH];
+ while (hasNext) {
+ const int entryPos = readingPos;
+ int probability = 0;
+ int targetCodePointCount = 0;
+ getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, targetCodePoints, &targetCodePointCount,
+ &probability, &hasNext, &readingPos);
+ if (targetCodePointCount != codePointCount) {
+ continue;
+ }
+ bool matched = true;
+ for (int i = 0; i < codePointCount; ++i) {
+ if (targetCodePointsToFind[i] != targetCodePoints[i]) {
+ matched = false;
+ break;
+ }
+ }
+ if (matched) {
+ return entryPos;
+ }
+ }
+ return NOT_A_DICT_POS;
+}
+
+int ShortcutDictContent::createAndGetShortcutFlags(const int probability,
+ const bool hasNext) const {
+ return (probability & Ver4DictConstants::SHORTCUT_PROBABILITY_MASK)
+ | (hasNext ? Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK : 0);
+}
+
+} // namespace v402
+} // namespace backward
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/shortcut_dict_content.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/shortcut_dict_content.h
new file mode 100644
index 0000000..048019d
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/shortcut_dict_content.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h
+ */
+
+#ifndef LATINIME_BACKWARD_V402_SHORTCUT_DICT_CONTENT_H
+#define LATINIME_BACKWARD_V402_SHORTCUT_DICT_CONTENT_H
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+class ShortcutDictContent : public SparseTableDictContent {
+ public:
+ ShortcutDictContent(const char *const dictPath, const bool isUpdatable)
+ : SparseTableDictContent(dictPath,
+ Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION,
+ Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION,
+ Ver4DictConstants::SHORTCUT_FILE_EXTENSION, isUpdatable,
+ Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
+ Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {}
+
+ ShortcutDictContent()
+ : SparseTableDictContent(Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
+ Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {}
+
+ void getShortcutEntry(const int maxCodePointCount, int *const outCodePoint,
+ int *const outCodePointCount, int *const outProbability, bool *const outhasNext,
+ const int shortcutEntryPos) {
+ int readingPos = shortcutEntryPos;
+ return getShortcutEntryAndAdvancePosition(maxCodePointCount, outCodePoint,
+ outCodePointCount, outProbability, outhasNext, &readingPos);
+ }
+
+ void getShortcutEntryAndAdvancePosition(const int maxCodePointCount,
+ int *const outCodePoint, int *const outCodePointCount, int *const outProbability,
+ bool *const outhasNext, int *const shortcutEntryPos) const;
+
+ // Returns head position of shortcut list for a PtNode specified by terminalId.
+ int getShortcutListHeadPos(const int terminalId) const;
+
+ bool flushToFile(const char *const dictPath) const;
+
+ bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const ShortcutDictContent *const originalShortcutDictContent);
+
+ bool createNewShortcutList(const int terminalId);
+
+ bool copyShortcutList(const int shortcutListPos, const int toPos);
+
+ bool setProbability(const int probability, const int shortcutEntryPos);
+
+ bool writeShortcutEntry(const int *const codePoint, const int codePointCount,
+ const int probability, const bool hasNext, const int shortcutEntryPos) {
+ int writingPos = shortcutEntryPos;
+ return writeShortcutEntryAndAdvancePosition(codePoint, codePointCount, probability,
+ hasNext, &writingPos);
+ }
+
+ bool writeShortcutEntryAndAdvancePosition(const int *const codePoint,
+ const int codePointCount, const int probability, const bool hasNext,
+ int *const shortcutEntryPos);
+
+ int findShortcutEntryAndGetPos(const int shortcutListPos,
+ const int *const targetCodePointsToFind, const int codePointCount) const;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ShortcutDictContent);
+
+ bool copyShortcutListFromDictContent(const int shortcutListPos,
+ const ShortcutDictContent *const sourceShortcutDictContent, const int toPos);
+
+ int createAndGetShortcutFlags(const int probability, const bool hasNext) const;
+};
+} // namespace v402
+} // namespace backward
+} // namespace latinime
+#endif /* LATINIME_BACKWARD_V402_SHORTCUT_DICT_CONTENT_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/single_dict_content.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/single_dict_content.h
new file mode 100644
index 0000000..78166a0
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/single_dict_content.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h
+ */
+
+#ifndef LATINIME_BACKWARD_V402_SINGLE_DICT_CONTENT_H
+#define LATINIME_BACKWARD_V402_SINGLE_DICT_CONTENT_H
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/dict_content.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+#include "third_party/android_prediction/utils/byte_array_view.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+class SingleDictContent : public DictContent {
+ public:
+ SingleDictContent(const char *const dictPath, const char *const contentFileName,
+ const bool isUpdatable)
+ : mMmappedBuffer(MmappedBuffer::openBuffer(dictPath, contentFileName, isUpdatable)),
+ mExpandableContentBuffer(
+ mMmappedBuffer ? mMmappedBuffer->getReadWriteByteArrayView() :
+ ReadWriteByteArrayView(),
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
+ mIsValid(mMmappedBuffer) {}
+
+ SingleDictContent()
+ : mMmappedBuffer(nullptr),
+ mExpandableContentBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE), mIsValid(true) {}
+
+ virtual ~SingleDictContent() {}
+
+ virtual bool isValid() const {
+ return mIsValid;
+ }
+
+ bool isNearSizeLimit() const {
+ return mExpandableContentBuffer.isNearSizeLimit();
+ }
+
+ protected:
+ BufferWithExtendableBuffer *getWritableBuffer() {
+ return &mExpandableContentBuffer;
+ }
+
+ const BufferWithExtendableBuffer *getBuffer() const {
+ return &mExpandableContentBuffer;
+ }
+
+ bool flush(const char *const dictPath, const char *const contentFileNameSuffix) const {
+ return DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath,
+ contentFileNameSuffix, &mExpandableContentBuffer);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(SingleDictContent);
+
+ const MmappedBuffer::MmappedBufferPtr mMmappedBuffer;
+ BufferWithExtendableBuffer mExpandableContentBuffer;
+ const bool mIsValid;
+};
+} // namespace v402
+} // namespace backward
+} // namespace latinime
+#endif /* LATINIME_BACKWARD_V402_SINGLE_DICT_CONTENT_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.cpp
new file mode 100644
index 0000000..7ad2437
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.cpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.cpp
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+bool SparseTableDictContent::flush(const char *const dictPath,
+ const char *const lookupTableFileNameSuffix, const char *const addressTableFileNameSuffix,
+ const char *const contentFileNameSuffix) const {
+ if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath, lookupTableFileNameSuffix,
+ &mExpandableLookupTableBuffer)){
+ return false;
+ }
+ if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath, addressTableFileNameSuffix,
+ &mExpandableAddressTableBuffer)) {
+ return false;
+ }
+ if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath, contentFileNameSuffix,
+ &mExpandableContentBuffer)) {
+ return false;
+ }
+ return true;
+}
+
+} // namespace v402
+} // namespace backward
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.h
new file mode 100644
index 0000000..92c4420
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h
+ */
+
+#ifndef LATINIME_BACKWARD_V402_SPARSE_TABLE_DICT_CONTENT_H
+#define LATINIME_BACKWARD_V402_SPARSE_TABLE_DICT_CONTENT_H
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/dict_content.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/sparse_table.h"
+#include "third_party/android_prediction/utils/byte_array_view.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+// TODO: Support multiple contents.
+class SparseTableDictContent : public DictContent {
+ public:
+ AK_FORCE_INLINE SparseTableDictContent(const char *const dictPath,
+ const char *const lookupTableFileName, const char *const addressTableFileName,
+ const char *const contentFileName, const bool isUpdatable,
+ const int sparseTableBlockSize, const int sparseTableDataSize)
+ : mLookupTableBuffer(
+ MmappedBuffer::openBuffer(dictPath, lookupTableFileName, isUpdatable)),
+ mAddressTableBuffer(
+ MmappedBuffer::openBuffer(dictPath, addressTableFileName, isUpdatable)),
+ mContentBuffer(
+ MmappedBuffer::openBuffer(dictPath, contentFileName, isUpdatable)),
+ mExpandableLookupTableBuffer(
+ mLookupTableBuffer ? mLookupTableBuffer->getReadWriteByteArrayView() :
+ ReadWriteByteArrayView(),
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
+ mExpandableAddressTableBuffer(
+ mAddressTableBuffer ? mAddressTableBuffer->getReadWriteByteArrayView() :
+ ReadWriteByteArrayView(),
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
+ mExpandableContentBuffer(
+ mContentBuffer ? mContentBuffer->getReadWriteByteArrayView() :
+ ReadWriteByteArrayView(),
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
+ mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer,
+ sparseTableBlockSize, sparseTableDataSize),
+ mIsValid(mLookupTableBuffer && mAddressTableBuffer && mContentBuffer) {}
+
+ SparseTableDictContent(const int sparseTableBlockSize, const int sparseTableDataSize)
+ : mLookupTableBuffer(), mAddressTableBuffer(), mContentBuffer(),
+ mExpandableLookupTableBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
+ mExpandableAddressTableBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
+ mExpandableContentBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
+ mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer,
+ sparseTableBlockSize, sparseTableDataSize), mIsValid(true) {}
+
+ virtual ~SparseTableDictContent() {}
+
+ virtual bool isValid() const {
+ return mIsValid;
+ }
+
+ bool isNearSizeLimit() const {
+ return mExpandableLookupTableBuffer.isNearSizeLimit()
+ || mExpandableAddressTableBuffer.isNearSizeLimit()
+ || mExpandableContentBuffer.isNearSizeLimit();
+ }
+
+ protected:
+ SparseTable *getUpdatableAddressLookupTable() {
+ return &mAddressLookupTable;
+ }
+
+ const SparseTable *getAddressLookupTable() const {
+ return &mAddressLookupTable;
+ }
+
+ BufferWithExtendableBuffer *getWritableContentBuffer() {
+ return &mExpandableContentBuffer;
+ }
+
+ const BufferWithExtendableBuffer *getContentBuffer() const {
+ return &mExpandableContentBuffer;
+ }
+
+ bool flush(const char *const dictDirPath, const char *const lookupTableFileName,
+ const char *const addressTableFileName, const char *const contentFileName) const;
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTableDictContent);
+
+ const MmappedBuffer::MmappedBufferPtr mLookupTableBuffer;
+ const MmappedBuffer::MmappedBufferPtr mAddressTableBuffer;
+ const MmappedBuffer::MmappedBufferPtr mContentBuffer;
+ BufferWithExtendableBuffer mExpandableLookupTableBuffer;
+ BufferWithExtendableBuffer mExpandableAddressTableBuffer;
+ BufferWithExtendableBuffer mExpandableContentBuffer;
+ SparseTable mAddressLookupTable;
+ const bool mIsValid;
+};
+} // namespace v402
+} // namespace backward
+} // namespace latinime
+#endif /* LATINIME_BACKWARD_V402_SPARSE_TABLE_DICT_CONTENT_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.cpp
new file mode 100644
index 0000000..681d847
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.cpp
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.cpp
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h"
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_reading_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+int TerminalPositionLookupTable::getTerminalPtNodePosition(const int terminalId) const {
+ if (terminalId < 0 || terminalId >= mSize) {
+ return NOT_A_DICT_POS;
+ }
+ const int terminalPos = getBuffer()->readUint(
+ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(terminalId));
+ return (terminalPos == Ver4DictConstants::NOT_A_TERMINAL_ADDRESS) ?
+ NOT_A_DICT_POS : terminalPos;
+}
+
+bool TerminalPositionLookupTable::setTerminalPtNodePosition(
+ const int terminalId, const int terminalPtNodePos) {
+ if (terminalId < 0) {
+ return NOT_A_DICT_POS;
+ }
+ while (terminalId >= mSize) {
+ // Write new entry.
+ if (!getWritableBuffer()->writeUint(Ver4DictConstants::NOT_A_TERMINAL_ADDRESS,
+ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(mSize))) {
+ return false;
+ }
+ mSize++;
+ }
+ const int terminalPos = (terminalPtNodePos != NOT_A_DICT_POS) ?
+ terminalPtNodePos : Ver4DictConstants::NOT_A_TERMINAL_ADDRESS;
+ return getWritableBuffer()->writeUint(terminalPos,
+ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(terminalId));
+}
+
+bool TerminalPositionLookupTable::flushToFile(const char *const dictPath) const {
+ // If the used buffer size is smaller than the actual buffer size, regenerate the lookup
+ // table and write the new table to the file.
+ if (getEntryPos(mSize) < getBuffer()->getTailPosition()) {
+ TerminalPositionLookupTable lookupTableToWrite;
+ for (int i = 0; i < mSize; ++i) {
+ const int terminalPtNodePosition = getTerminalPtNodePosition(i);
+ if (!lookupTableToWrite.setTerminalPtNodePosition(i, terminalPtNodePosition)) {
+ AKLOGE("Cannot set terminal position to lookupTableToWrite."
+ " terminalId: %d, position: %d", i, terminalPtNodePosition);
+ return false;
+ }
+ }
+ return lookupTableToWrite.flush(dictPath,
+ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
+ } else {
+ // We can simply use this lookup table because the buffer size has not been
+ // changed.
+ return flush(dictPath, Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
+ }
+}
+
+bool TerminalPositionLookupTable::runGCTerminalIds(TerminalIdMap *const terminalIdMap) {
+ int removedEntryCount = 0;
+ int nextNewTerminalId = 0;
+ for (int i = 0; i < mSize; ++i) {
+ const int terminalPos = getBuffer()->readUint(
+ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(i));
+ if (terminalPos == Ver4DictConstants::NOT_A_TERMINAL_ADDRESS) {
+ // This entry is a garbage.
+ removedEntryCount++;
+ } else {
+ // Give a new terminal id to the entry.
+ if (!getWritableBuffer()->writeUint(terminalPos,
+ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE,
+ getEntryPos(nextNewTerminalId))) {
+ return false;
+ }
+ // Memorize the mapping to the old terminal id to the new terminal id.
+ terminalIdMap->insert(TerminalIdMap::value_type(i, nextNewTerminalId));
+ nextNewTerminalId++;
+ }
+ }
+ mSize = nextNewTerminalId;
+ return true;
+}
+
+} // namespace v402
+} // namespace backward
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h
new file mode 100644
index 0000000..7cb401c
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h
+ */
+
+#ifndef LATINIME_BACKWARD_V402_TERMINAL_POSITION_LOOKUP_TABLE_H
+#define LATINIME_BACKWARD_V402_TERMINAL_POSITION_LOOKUP_TABLE_H
+
+#include <unordered_map>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/single_dict_content.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+class TerminalPositionLookupTable : public SingleDictContent {
+ public:
+ typedef std::unordered_map<int, int> TerminalIdMap;
+
+ TerminalPositionLookupTable(const char *const dictPath, const bool isUpdatable)
+ : SingleDictContent(dictPath,
+ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION, isUpdatable),
+ mSize(getBuffer()->getTailPosition()
+ / Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE) {}
+
+ TerminalPositionLookupTable() : mSize(0) {}
+
+ int getTerminalPtNodePosition(const int terminalId) const;
+
+ bool setTerminalPtNodePosition(const int terminalId, const int terminalPtNodePos);
+
+ int getNextTerminalId() const {
+ return mSize;
+ }
+
+ bool flushToFile(const char *const dictPath) const;
+
+ bool runGCTerminalIds(TerminalIdMap *const terminalIdMap);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(TerminalPositionLookupTable);
+
+ int getEntryPos(const int terminalId) const {
+ return terminalId * Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE;
+ }
+
+ int mSize;
+};
+} // namespace v402
+} // namespace backward
+} // namespace latinime
+#endif // LATINIME_BACKWARD_V402_TERMINAL_POSITION_LOOKUP_TABLE_H
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/shortcut/ver4_shortcut_list_policy.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/shortcut/ver4_shortcut_list_policy.h
new file mode 100644
index 0000000..98574d7
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/shortcut/ver4_shortcut_list_policy.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!!
+ * Do not edit this file other than updating policy's interface.
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h
+ */
+
+#ifndef LATINIME_BACKWARD_V402_VER4_SHORTCUT_LIST_POLICY_H
+#define LATINIME_BACKWARD_V402_VER4_SHORTCUT_LIST_POLICY_H
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/policy/dictionary_shortcuts_structure_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/shortcut_dict_content.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+class Ver4ShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
+ public:
+ Ver4ShortcutListPolicy(ShortcutDictContent *const shortcutDictContent,
+ const TerminalPositionLookupTable *const terminalPositionLookupTable)
+ : mShortcutDictContent(shortcutDictContent) {}
+
+ ~Ver4ShortcutListPolicy() {}
+
+ int getStartPos(const int pos) const {
+ // The first shortcut entry is located at the head position of the shortcut list.
+ return pos;
+ }
+
+ void getNextShortcut(const int maxCodePointCount, int *const outCodePoint,
+ int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext,
+ int *const pos) const {
+ int probability = 0;
+ mShortcutDictContent->getShortcutEntryAndAdvancePosition(maxCodePointCount,
+ outCodePoint, outCodePointCount, &probability, outHasNext, pos);
+ if (outIsWhitelist) {
+ *outIsWhitelist = ShortcutListReadingUtils::isWhitelist(probability);
+ }
+ }
+
+ void skipAllShortcuts(int *const pos) const {
+ // Do nothing because we don't need to skip shortcut lists in ver4 dictionaries.
+ }
+
+ bool addNewShortcut(const int terminalId, const int *const codePoints, const int codePointCount,
+ const int probability) {
+ const int shortcutListPos = mShortcutDictContent->getShortcutListHeadPos(terminalId);
+ if (shortcutListPos == NOT_A_DICT_POS) {
+ // Create shortcut list.
+ if (!mShortcutDictContent->createNewShortcutList(terminalId)) {
+ AKLOGE("Cannot create new shortcut list. terminal id: %d", terminalId);
+ return false;
+ }
+ const int writingPos = mShortcutDictContent->getShortcutListHeadPos(terminalId);
+ return mShortcutDictContent->writeShortcutEntry(codePoints, codePointCount, probability,
+ false /* hasNext */, writingPos);
+ }
+ const int entryPos = mShortcutDictContent->findShortcutEntryAndGetPos(shortcutListPos,
+ codePoints, codePointCount);
+ if (entryPos == NOT_A_DICT_POS) {
+ // Add new entry to the shortcut list.
+ // Create new shortcut list.
+ if (!mShortcutDictContent->createNewShortcutList(terminalId)) {
+ AKLOGE("Cannot create new shortcut list. terminal id: %d", terminalId);
+ return false;
+ }
+ int writingPos = mShortcutDictContent->getShortcutListHeadPos(terminalId);
+ if (!mShortcutDictContent->writeShortcutEntryAndAdvancePosition(codePoints,
+ codePointCount, probability, true /* hasNext */, &writingPos)) {
+ AKLOGE("Cannot write shortcut entry. terminal id: %d, pos: %d", terminalId,
+ writingPos);
+ return false;
+ }
+ return mShortcutDictContent->copyShortcutList(shortcutListPos, writingPos);
+ }
+ // Overwrite existing entry.
+ bool hasNext = false;
+ mShortcutDictContent->getShortcutEntry(MAX_WORD_LENGTH, 0 /* outCodePoint */,
+ 0 /* outCodePointCount */ , 0 /* probability */, &hasNext, entryPos);
+ if (!mShortcutDictContent->writeShortcutEntry(codePoints,
+ codePointCount, probability, hasNext, entryPos)) {
+ AKLOGE("Cannot overwrite shortcut entry. terminal id: %d, pos: %d", terminalId,
+ entryPos);
+ return false;
+ }
+ return true;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4ShortcutListPolicy);
+
+ ShortcutDictContent *const mShortcutDictContent;
+};
+} // namespace v402
+} // namespace backward
+} // namespace latinime
+#endif // LATINIME_BACKWARD_V402_VER4_SHORTCUT_LIST_POLICY_H
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.cpp
new file mode 100644
index 0000000..fbaecf7
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.cpp
@@ -0,0 +1,157 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.h"
+
+#include <cerrno>
+#include <cstring>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/file_utils.h"
+#include "third_party/android_prediction/utils/byte_array_view.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+/* static */ Ver4DictBuffers::Ver4DictBuffersPtr Ver4DictBuffers::openVer4DictBuffers(
+ const char *const dictPath, MmappedBuffer::MmappedBufferPtr headerBuffer,
+ const FormatUtils::FORMAT_VERSION formatVersion) {
+ if (!headerBuffer) {
+ ASSERT(false);
+ AKLOGE("The header buffer must be valid to open ver4 dict buffers.");
+ return Ver4DictBuffersPtr(nullptr);
+ }
+ // TODO: take only dictDirPath, and open both header and trie files in the constructor below
+ const bool isUpdatable = headerBuffer->isUpdatable();
+ return Ver4DictBuffersPtr(new Ver4DictBuffers(dictPath, std::move(headerBuffer), isUpdatable,
+ formatVersion));
+}
+
+bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath,
+ const BufferWithExtendableBuffer *const headerBuffer) const {
+ // Create temporary directory.
+ const int tmpDirPathBufSize = FileUtils::getFilePathWithSuffixBufSize(dictDirPath,
+ DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE);
+ char tmpDirPath[tmpDirPathBufSize];
+ FileUtils::getFilePathWithSuffix(dictDirPath,
+ DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE, tmpDirPathBufSize,
+ tmpDirPath);
+ if (FileUtils::existsDir(tmpDirPath)) {
+ if (!FileUtils::removeDirAndFiles(tmpDirPath)) {
+ AKLOGE("Existing directory %s cannot be removed.", tmpDirPath);
+ ASSERT(false);
+ return false;
+ }
+ }
+ umask(S_IWGRP | S_IWOTH);
+ if (mkdir(tmpDirPath, S_IRWXU) == -1) {
+ AKLOGE("Cannot create directory: %s. errno: %d.", tmpDirPath, errno);
+ return false;
+ }
+ // Get dictionary base path.
+ const int dictNameBufSize = strlen(dictDirPath) + 1 /* terminator */;
+ char dictName[dictNameBufSize];
+ FileUtils::getBasename(dictDirPath, dictNameBufSize, dictName);
+ const int dictPathBufSize = FileUtils::getFilePathBufSize(tmpDirPath, dictName);
+ char dictPath[dictPathBufSize];
+ FileUtils::getFilePath(tmpDirPath, dictName, dictPathBufSize, dictPath);
+
+ // Write header file.
+ if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath,
+ Ver4DictConstants::HEADER_FILE_EXTENSION, headerBuffer)) {
+ AKLOGE("Dictionary header file %s%s cannot be written.", tmpDirPath,
+ Ver4DictConstants::HEADER_FILE_EXTENSION);
+ return false;
+ }
+ // Write trie file.
+ if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath,
+ Ver4DictConstants::TRIE_FILE_EXTENSION, &mExpandableTrieBuffer)) {
+ AKLOGE("Dictionary trie file %s%s cannot be written.", tmpDirPath,
+ Ver4DictConstants::TRIE_FILE_EXTENSION);
+ return false;
+ }
+ // Write dictionary contents.
+ if (!mTerminalPositionLookupTable.flushToFile(dictPath)) {
+ AKLOGE("Terminal position lookup table cannot be written. %s", tmpDirPath);
+ return false;
+ }
+ if (!mProbabilityDictContent.flushToFile(dictPath)) {
+ AKLOGE("Probability dict content cannot be written. %s", tmpDirPath);
+ return false;
+ }
+ if (!mBigramDictContent.flushToFile(dictPath)) {
+ AKLOGE("Bigram dict content cannot be written. %s", tmpDirPath);
+ return false;
+ }
+ if (!mShortcutDictContent.flushToFile(dictPath)) {
+ AKLOGE("Shortcut dict content cannot be written. %s", tmpDirPath);
+ return false;
+ }
+ // Remove existing dictionary.
+ if (!FileUtils::removeDirAndFiles(dictDirPath)) {
+ AKLOGE("Existing directory %s cannot be removed.", dictDirPath);
+ ASSERT(false);
+ return false;
+ }
+ // Rename temporary directory.
+ if (rename(tmpDirPath, dictDirPath) != 0) {
+ AKLOGE("%s cannot be renamed to %s", tmpDirPath, dictDirPath);
+ ASSERT(false);
+ return false;
+ }
+ return true;
+}
+
+Ver4DictBuffers::Ver4DictBuffers(const char *const dictPath,
+ MmappedBuffer::MmappedBufferPtr headerBuffer, const bool isUpdatable,
+ const FormatUtils::FORMAT_VERSION formatVersion)
+ : mHeaderBuffer(std::move(headerBuffer)),
+ mDictBuffer(MmappedBuffer::openBuffer(dictPath,
+ Ver4DictConstants::TRIE_FILE_EXTENSION, isUpdatable)),
+ mHeaderPolicy(mHeaderBuffer->getReadOnlyByteArrayView().data(), formatVersion),
+ mExpandableHeaderBuffer(mHeaderBuffer->getReadWriteByteArrayView(),
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
+ mExpandableTrieBuffer(
+ mDictBuffer ? mDictBuffer->getReadWriteByteArrayView() :
+ ReadWriteByteArrayView(),
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
+ mTerminalPositionLookupTable(dictPath, isUpdatable),
+ mProbabilityDictContent(dictPath, mHeaderPolicy.hasHistoricalInfoOfWords(), isUpdatable),
+ mBigramDictContent(dictPath, mHeaderPolicy.hasHistoricalInfoOfWords(), isUpdatable),
+ mShortcutDictContent(dictPath, isUpdatable),
+ mIsUpdatable(isUpdatable) {}
+
+Ver4DictBuffers::Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize)
+ : mHeaderBuffer(nullptr), mDictBuffer(nullptr), mHeaderPolicy(headerPolicy),
+ mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
+ mExpandableTrieBuffer(maxTrieSize), mTerminalPositionLookupTable(),
+ mProbabilityDictContent(headerPolicy->hasHistoricalInfoOfWords()),
+ mBigramDictContent(headerPolicy->hasHistoricalInfoOfWords()), mShortcutDictContent(),
+ mIsUpdatable(true) {}
+
+} // namespace v402
+} // namespace backward
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.h
new file mode 100644
index 0000000..f9c83ba
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.h
@@ -0,0 +1,152 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
+ */
+
+#ifndef LATINIME_BACKWARD_V402_VER4_DICT_BUFFER_H
+#define LATINIME_BACKWARD_V402_VER4_DICT_BUFFER_H
+
+#include <memory>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/header/header_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/shortcut_dict_content.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+class Ver4DictBuffers {
+ public:
+ typedef std::unique_ptr<Ver4DictBuffers> Ver4DictBuffersPtr;
+
+ static Ver4DictBuffersPtr openVer4DictBuffers(const char *const dictDirPath,
+ MmappedBuffer::MmappedBufferPtr headerBuffer,
+ const FormatUtils::FORMAT_VERSION formatVersion);
+
+ static AK_FORCE_INLINE Ver4DictBuffersPtr createVer4DictBuffers(
+ const HeaderPolicy *const headerPolicy, const int maxTrieSize) {
+ return Ver4DictBuffersPtr(new Ver4DictBuffers(headerPolicy, maxTrieSize));
+ }
+
+ AK_FORCE_INLINE bool isValid() const {
+ return mHeaderBuffer && mDictBuffer && mHeaderPolicy.isValid()
+ && mProbabilityDictContent.isValid() && mTerminalPositionLookupTable.isValid()
+ && mBigramDictContent.isValid() && mShortcutDictContent.isValid();
+ }
+
+ AK_FORCE_INLINE bool isNearSizeLimit() const {
+ return mExpandableTrieBuffer.isNearSizeLimit()
+ || mTerminalPositionLookupTable.isNearSizeLimit()
+ || mProbabilityDictContent.isNearSizeLimit()
+ || mBigramDictContent.isNearSizeLimit()
+ || mShortcutDictContent.isNearSizeLimit();
+ }
+
+ AK_FORCE_INLINE const HeaderPolicy *getHeaderPolicy() const {
+ return &mHeaderPolicy;
+ }
+
+ AK_FORCE_INLINE BufferWithExtendableBuffer *getWritableHeaderBuffer() {
+ return &mExpandableHeaderBuffer;
+ }
+
+ AK_FORCE_INLINE BufferWithExtendableBuffer *getWritableTrieBuffer() {
+ return &mExpandableTrieBuffer;
+ }
+
+ AK_FORCE_INLINE const BufferWithExtendableBuffer *getTrieBuffer() const {
+ return &mExpandableTrieBuffer;
+ }
+
+ AK_FORCE_INLINE TerminalPositionLookupTable *getMutableTerminalPositionLookupTable() {
+ return &mTerminalPositionLookupTable;
+ }
+
+ AK_FORCE_INLINE const TerminalPositionLookupTable *getTerminalPositionLookupTable() const {
+ return &mTerminalPositionLookupTable;
+ }
+
+ AK_FORCE_INLINE ProbabilityDictContent *getMutableProbabilityDictContent() {
+ return &mProbabilityDictContent;
+ }
+
+ AK_FORCE_INLINE const ProbabilityDictContent *getProbabilityDictContent() const {
+ return &mProbabilityDictContent;
+ }
+
+ AK_FORCE_INLINE BigramDictContent *getMutableBigramDictContent() {
+ return &mBigramDictContent;
+ }
+
+ AK_FORCE_INLINE const BigramDictContent *getBigramDictContent() const {
+ return &mBigramDictContent;
+ }
+
+ AK_FORCE_INLINE ShortcutDictContent *getMutableShortcutDictContent() {
+ return &mShortcutDictContent;
+ }
+
+ AK_FORCE_INLINE const ShortcutDictContent *getShortcutDictContent() const {
+ return &mShortcutDictContent;
+ }
+
+ AK_FORCE_INLINE bool isUpdatable() const {
+ return mIsUpdatable;
+ }
+
+ bool flush(const char *const dictDirPath) const {
+ return flushHeaderAndDictBuffers(dictDirPath, &mExpandableHeaderBuffer);
+ }
+
+ bool flushHeaderAndDictBuffers(const char *const dictDirPath,
+ const BufferWithExtendableBuffer *const headerBuffer) const;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(Ver4DictBuffers);
+
+ Ver4DictBuffers(const char *const dictDirPath,
+ const MmappedBuffer::MmappedBufferPtr headerBuffer, const bool isUpdatable,
+ const FormatUtils::FORMAT_VERSION formatVersion);
+
+ Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize);
+
+ const MmappedBuffer::MmappedBufferPtr mHeaderBuffer;
+ const MmappedBuffer::MmappedBufferPtr mDictBuffer;
+ const HeaderPolicy mHeaderPolicy;
+ BufferWithExtendableBuffer mExpandableHeaderBuffer;
+ BufferWithExtendableBuffer mExpandableTrieBuffer;
+ TerminalPositionLookupTable mTerminalPositionLookupTable;
+ ProbabilityDictContent mProbabilityDictContent;
+ BigramDictContent mBigramDictContent;
+ ShortcutDictContent mShortcutDictContent;
+ const int mIsUpdatable;
+};
+} // namespace v402
+} // namespace backward
+} // namespace latinime
+#endif /* LATINIME_BACKWARD_V402_VER4_DICT_BUFFER_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.cpp
new file mode 100644
index 0000000..a2281dd
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+// These values MUST match the definitions in FormatSpec.java.
+const char *const Ver4DictConstants::TRIE_FILE_EXTENSION = ".trie";
+const char *const Ver4DictConstants::HEADER_FILE_EXTENSION = ".header";
+const char *const Ver4DictConstants::FREQ_FILE_EXTENSION = ".freq";
+// tat = Terminal Address Table
+const char *const Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION = ".tat";
+const char *const Ver4DictConstants::BIGRAM_FILE_EXTENSION = ".bigram_freq";
+const char *const Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION = ".bigram_lookup";
+const char *const Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION = ".bigram_index_freq";
+const char *const Ver4DictConstants::SHORTCUT_FILE_EXTENSION = ".shortcut_shortcut";
+const char *const Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION = ".shortcut_lookup";
+const char *const Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION =
+ ".shortcut_index_shortcut";
+
+// Version 4 dictionary size is implicitly limited to 8MB due to 3-byte offsets.
+const int Ver4DictConstants::MAX_DICTIONARY_SIZE = 8 * 1024 * 1024;
+// Extended region size, which is not GCed region size in dict file + additional buffer size, is
+// limited to 1MB to prevent from inefficient traversing.
+const int Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE = 1 * 1024 * 1024;
+
+const int Ver4DictConstants::NOT_A_TERMINAL_ID = -1;
+const int Ver4DictConstants::PROBABILITY_SIZE = 1;
+const int Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE = 1;
+const int Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3;
+const int Ver4DictConstants::NOT_A_TERMINAL_ADDRESS = 0;
+const int Ver4DictConstants::TERMINAL_ID_FIELD_SIZE = 4;
+const int Ver4DictConstants::TIME_STAMP_FIELD_SIZE = 4;
+const int Ver4DictConstants::WORD_LEVEL_FIELD_SIZE = 1;
+const int Ver4DictConstants::WORD_COUNT_FIELD_SIZE = 1;
+
+const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 16;
+const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE = 4;
+const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 64;
+const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4;
+
+const int Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE = 3;
+// Unsigned int max value of BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE-byte is used for representing
+// invalid terminal ID in bigram lists.
+const int Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID =
+ (1 << (BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE * 8)) - 1;
+const int Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE = 1;
+const int Ver4DictConstants::BIGRAM_PROBABILITY_MASK = 0x0F;
+const int Ver4DictConstants::BIGRAM_HAS_NEXT_MASK = 0x80;
+const int Ver4DictConstants::BIGRAM_LARGE_PROBABILITY_FIELD_SIZE = 1;
+
+const int Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE = 1;
+const int Ver4DictConstants::SHORTCUT_PROBABILITY_MASK = 0x0F;
+const int Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK = 0x80;
+
+} // namespace v402
+} // namespace backward
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h
new file mode 100644
index 0000000..32e8710
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
+ */
+
+#ifndef LATINIME_BACKWARD_V402_VER4_DICT_CONSTANTS_H
+#define LATINIME_BACKWARD_V402_VER4_DICT_CONSTANTS_H
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+// TODO: Create PtConstants under the pt_common and move some constant values there.
+// Note that there are corresponding definitions in FormatSpec.java.
+class Ver4DictConstants {
+ public:
+ static const char *const TRIE_FILE_EXTENSION;
+ static const char *const HEADER_FILE_EXTENSION;
+ static const char *const FREQ_FILE_EXTENSION;
+ static const char *const TERMINAL_ADDRESS_TABLE_FILE_EXTENSION;
+ static const char *const BIGRAM_FILE_EXTENSION;
+ static const char *const BIGRAM_LOOKUP_TABLE_FILE_EXTENSION;
+ static const char *const BIGRAM_CONTENT_TABLE_FILE_EXTENSION;
+ static const char *const SHORTCUT_FILE_EXTENSION;
+ static const char *const SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION;
+ static const char *const SHORTCUT_CONTENT_TABLE_FILE_EXTENSION;
+
+ static const int MAX_DICTIONARY_SIZE;
+ static const int MAX_DICT_EXTENDED_REGION_SIZE;
+
+ static const int NOT_A_TERMINAL_ID;
+ static const int PROBABILITY_SIZE;
+ static const int FLAGS_IN_PROBABILITY_FILE_SIZE;
+ static const int TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE;
+ static const int NOT_A_TERMINAL_ADDRESS;
+ static const int TERMINAL_ID_FIELD_SIZE;
+ static const int TIME_STAMP_FIELD_SIZE;
+ static const int WORD_LEVEL_FIELD_SIZE;
+ static const int WORD_COUNT_FIELD_SIZE;
+
+ static const int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE;
+ static const int BIGRAM_ADDRESS_TABLE_DATA_SIZE;
+ static const int SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE;
+ static const int SHORTCUT_ADDRESS_TABLE_DATA_SIZE;
+
+ static const int BIGRAM_FLAGS_FIELD_SIZE;
+ static const int BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
+ static const int INVALID_BIGRAM_TARGET_TERMINAL_ID;
+ static const int BIGRAM_PROBABILITY_MASK;
+ static const int BIGRAM_HAS_NEXT_MASK;
+ // Used when bigram list has time stamp.
+ static const int BIGRAM_LARGE_PROBABILITY_FIELD_SIZE;
+
+ static const int SHORTCUT_FLAGS_FIELD_SIZE;
+ static const int SHORTCUT_PROBABILITY_MASK;
+ static const int SHORTCUT_HAS_NEXT_MASK;
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictConstants);
+};
+} // namespace v402
+} // namespace backward
+} // namespace latinime
+#endif /* LATINIME_BACKWARD_V402_VER4_DICT_CONSTANTS_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.cpp
new file mode 100644
index 0000000..6e9be0a
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.cpp
@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h"
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_reading_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode(
+ const int ptNodePos, const int siblingNodePos) const {
+ if (ptNodePos < 0 || ptNodePos >= mBuffer->getTailPosition()) {
+ // Reading invalid position because of bug or broken dictionary.
+ AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d",
+ ptNodePos, mBuffer->getTailPosition());
+ ASSERT(false);
+ return PtNodeParams();
+ }
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodePos);
+ const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
+ int pos = ptNodePos;
+ const int headPos = ptNodePos;
+ if (usesAdditionalBuffer) {
+ pos -= mBuffer->getOriginalBufferSize();
+ }
+ const PatriciaTrieReadingUtils::NodeFlags flags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
+ const int parentPosOffset =
+ DynamicPtReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(
+ dictBuf, &pos);
+ const int parentPos =
+ DynamicPtReadingUtils::getParentPtNodePos(parentPosOffset, headPos);
+ int codePoints[MAX_WORD_LENGTH];
+ const int codePonitCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
+ dictBuf, flags, MAX_WORD_LENGTH, codePoints, &pos);
+ int terminalIdFieldPos = NOT_A_DICT_POS;
+ int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
+ int probability = NOT_A_PROBABILITY;
+ if (PatriciaTrieReadingUtils::isTerminal(flags)) {
+ terminalIdFieldPos = pos;
+ if (usesAdditionalBuffer) {
+ terminalIdFieldPos += mBuffer->getOriginalBufferSize();
+ }
+ terminalId = Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(dictBuf, &pos);
+ const ProbabilityEntry probabilityEntry =
+ mProbabilityDictContent->getProbabilityEntry(terminalId);
+ if (probabilityEntry.hasHistoricalInfo()) {
+ probability = ForgettingCurveUtils::decodeProbability(
+ probabilityEntry.getHistoricalInfo(), mHeaderPolicy);
+ } else {
+ probability = probabilityEntry.getProbability();
+ }
+ }
+ int childrenPosFieldPos = pos;
+ if (usesAdditionalBuffer) {
+ childrenPosFieldPos += mBuffer->getOriginalBufferSize();
+ }
+ int childrenPos = DynamicPtReadingUtils::readChildrenPositionAndAdvancePosition(
+ dictBuf, &pos);
+ if (usesAdditionalBuffer && childrenPos != NOT_A_DICT_POS) {
+ childrenPos += mBuffer->getOriginalBufferSize();
+ }
+ if (usesAdditionalBuffer) {
+ pos += mBuffer->getOriginalBufferSize();
+ }
+ // Sibling position is the tail position of original PtNode.
+ int newSiblingNodePos = (siblingNodePos == NOT_A_DICT_POS) ? pos : siblingNodePos;
+ // Read destination node if the read node is a moved node.
+ if (DynamicPtReadingUtils::isMoved(flags)) {
+ // The destination position is stored at the same place as the parent position.
+ return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(parentPos, newSiblingNodePos);
+ } else {
+ return PtNodeParams(headPos, flags, parentPos, codePonitCount, codePoints,
+ terminalIdFieldPos, terminalId, probability, childrenPosFieldPos, childrenPos,
+ newSiblingNodePos);
+ }
+}
+
+} // namespace v402
+} // namespace backward
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h
new file mode 100644
index 0000000..8f06a43
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h
+ */
+
+#ifndef LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_NODE_READER_H
+#define LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_NODE_READER_H
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+} // namespace v402
+} // namespace backward
+class BufferWithExtendableBuffer;
+namespace backward {
+namespace v402 {
+} // namespace v402
+} // namespace backward
+class HeaderPolicy;
+namespace backward {
+namespace v402 {
+class ProbabilityDictContent;
+
+/*
+ * This class is used for helping to read nodes of ver4 patricia trie. This class handles moved
+ * node and reads node attributes including probability form probabilityBuffer.
+ */
+class Ver4PatriciaTrieNodeReader : public PtNodeReader {
+ public:
+ Ver4PatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer,
+ const ProbabilityDictContent *const probabilityDictContent,
+ const HeaderPolicy *const headerPolicy)
+ : mBuffer(buffer), mProbabilityDictContent(probabilityDictContent),
+ mHeaderPolicy(headerPolicy) {}
+
+ ~Ver4PatriciaTrieNodeReader() {}
+
+ virtual const PtNodeParams fetchPtNodeParamsInBufferFromPtNodePos(const int ptNodePos) const {
+ return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(ptNodePos,
+ NOT_A_DICT_POS /* siblingNodePos */);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeReader);
+
+ const BufferWithExtendableBuffer *const mBuffer;
+ const ProbabilityDictContent *const mProbabilityDictContent;
+ const HeaderPolicy *const mHeaderPolicy;
+
+ const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
+ const int siblingNodePos) const;
+};
+} // namespace v402
+} // namespace backward
+} // namespace latinime
+#endif /* LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_NODE_READER_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp
new file mode 100644
index 0000000..d7bae8b
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp
@@ -0,0 +1,430 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h"
+
+#include "third_party/android_prediction/suggest/core/dictionary/property/unigram_property.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/header/header_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/shortcut/ver4_shortcut_list_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+const int Ver4PatriciaTrieNodeWriter::CHILDREN_POSITION_FIELD_SIZE = 3;
+
+bool Ver4PatriciaTrieNodeWriter::markPtNodeAsDeleted(
+ const PtNodeParams *const toBeUpdatedPtNodeParams) {
+ int pos = toBeUpdatedPtNodeParams->getHeadPos();
+ const bool usesAdditionalBuffer = mTrieBuffer->isInAdditionalBuffer(pos);
+ const uint8_t *const dictBuf = mTrieBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ pos -= mTrieBuffer->getOriginalBufferSize();
+ }
+ // Read original flags
+ const PatriciaTrieReadingUtils::NodeFlags originalFlags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
+ const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
+ DynamicPtReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */,
+ true /* isDeleted */, false /* willBecomeNonTerminal */);
+ int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
+ // Update flags.
+ if (!DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags,
+ &writingPos)) {
+ return false;
+ }
+ if (toBeUpdatedPtNodeParams->isTerminal()) {
+ // The PtNode is a terminal. Delete entry from the terminal position lookup table.
+ return mBuffers->getMutableTerminalPositionLookupTable()->setTerminalPtNodePosition(
+ toBeUpdatedPtNodeParams->getTerminalId(), NOT_A_DICT_POS /* ptNodePos */);
+ } else {
+ return true;
+ }
+}
+
+bool Ver4PatriciaTrieNodeWriter::markPtNodeAsMoved(
+ const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const int movedPos, const int bigramLinkedNodePos) {
+ int pos = toBeUpdatedPtNodeParams->getHeadPos();
+ const bool usesAdditionalBuffer = mTrieBuffer->isInAdditionalBuffer(pos);
+ const uint8_t *const dictBuf = mTrieBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ pos -= mTrieBuffer->getOriginalBufferSize();
+ }
+ // Read original flags
+ const PatriciaTrieReadingUtils::NodeFlags originalFlags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
+ const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
+ DynamicPtReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */,
+ false /* isDeleted */, false /* willBecomeNonTerminal */);
+ int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
+ // Update flags.
+ if (!DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags,
+ &writingPos)) {
+ return false;
+ }
+ // Update moved position, which is stored in the parent offset field.
+ if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(
+ mTrieBuffer, movedPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) {
+ return false;
+ }
+ if (toBeUpdatedPtNodeParams->hasChildren()) {
+ // Update children's parent position.
+ mReadingHelper.initWithPtNodeArrayPos(toBeUpdatedPtNodeParams->getChildrenPos());
+ while (!mReadingHelper.isEnd()) {
+ const PtNodeParams childPtNodeParams(mReadingHelper.getPtNodeParams());
+ int parentOffsetFieldPos = childPtNodeParams.getHeadPos()
+ + DynamicPtWritingUtils::NODE_FLAG_FIELD_SIZE;
+ if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(
+ mTrieBuffer, bigramLinkedNodePos, childPtNodeParams.getHeadPos(),
+ &parentOffsetFieldPos)) {
+ // Parent offset cannot be written because of a bug or a broken dictionary; thus,
+ // we give up to update dictionary.
+ return false;
+ }
+ mReadingHelper.readNextSiblingNode(childPtNodeParams);
+ }
+ }
+ return true;
+}
+
+bool Ver4PatriciaTrieNodeWriter::markPtNodeAsWillBecomeNonTerminal(
+ const PtNodeParams *const toBeUpdatedPtNodeParams) {
+ int pos = toBeUpdatedPtNodeParams->getHeadPos();
+ const bool usesAdditionalBuffer = mTrieBuffer->isInAdditionalBuffer(pos);
+ const uint8_t *const dictBuf = mTrieBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ pos -= mTrieBuffer->getOriginalBufferSize();
+ }
+ // Read original flags
+ const PatriciaTrieReadingUtils::NodeFlags originalFlags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
+ const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
+ DynamicPtReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */,
+ false /* isDeleted */, true /* willBecomeNonTerminal */);
+ if (!mBuffers->getMutableTerminalPositionLookupTable()->setTerminalPtNodePosition(
+ toBeUpdatedPtNodeParams->getTerminalId(), NOT_A_DICT_POS /* ptNodePos */)) {
+ AKLOGE("Cannot update terminal position lookup table. terminal id: %d",
+ toBeUpdatedPtNodeParams->getTerminalId());
+ return false;
+ }
+ // Update flags.
+ int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
+ return DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags,
+ &writingPos);
+}
+
+bool Ver4PatriciaTrieNodeWriter::updatePtNodeUnigramProperty(
+ const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const UnigramProperty *const unigramProperty) {
+ // Update probability and historical information.
+ // TODO: Update other information in the unigram property.
+ if (!toBeUpdatedPtNodeParams->isTerminal()) {
+ return false;
+ }
+ const ProbabilityEntry originalProbabilityEntry =
+ mBuffers->getProbabilityDictContent()->getProbabilityEntry(
+ toBeUpdatedPtNodeParams->getTerminalId());
+ const ProbabilityEntry probabilityEntry = createUpdatedEntryFrom(&originalProbabilityEntry,
+ unigramProperty);
+ return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
+ toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry);
+}
+
+bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(
+ const PtNodeParams *const toBeUpdatedPtNodeParams, bool *const outNeedsToKeepPtNode) {
+ if (!toBeUpdatedPtNodeParams->isTerminal()) {
+ AKLOGE("updatePtNodeProbabilityAndGetNeedsToSaveForGC is called for non-terminal PtNode.");
+ return false;
+ }
+ const ProbabilityEntry originalProbabilityEntry =
+ mBuffers->getProbabilityDictContent()->getProbabilityEntry(
+ toBeUpdatedPtNodeParams->getTerminalId());
+ if (originalProbabilityEntry.hasHistoricalInfo()) {
+ const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
+ originalProbabilityEntry.getHistoricalInfo(), mHeaderPolicy);
+ const ProbabilityEntry probabilityEntry =
+ originalProbabilityEntry.createEntryWithUpdatedHistoricalInfo(&historicalInfo);
+ if (!mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
+ toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry)) {
+ AKLOGE("Cannot write updated probability entry. terminalId: %d",
+ toBeUpdatedPtNodeParams->getTerminalId());
+ return false;
+ }
+ const bool isValid = ForgettingCurveUtils::needsToKeep(&historicalInfo, mHeaderPolicy);
+ if (!isValid) {
+ if (!markPtNodeAsWillBecomeNonTerminal(toBeUpdatedPtNodeParams)) {
+ AKLOGE("Cannot mark PtNode as willBecomeNonTerminal.");
+ return false;
+ }
+ }
+ *outNeedsToKeepPtNode = isValid;
+ } else {
+ // No need to update probability.
+ *outNeedsToKeepPtNode = true;
+ }
+ return true;
+}
+
+bool Ver4PatriciaTrieNodeWriter::updateChildrenPosition(
+ const PtNodeParams *const toBeUpdatedPtNodeParams, const int newChildrenPosition) {
+ int childrenPosFieldPos = toBeUpdatedPtNodeParams->getChildrenPosFieldPos();
+ return DynamicPtWritingUtils::writeChildrenPositionAndAdvancePosition(mTrieBuffer,
+ newChildrenPosition, &childrenPosFieldPos);
+}
+
+bool Ver4PatriciaTrieNodeWriter::updateTerminalId(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const int newTerminalId) {
+ return mTrieBuffer->writeUint(newTerminalId, Ver4DictConstants::TERMINAL_ID_FIELD_SIZE,
+ toBeUpdatedPtNodeParams->getTerminalIdFieldPos());
+}
+
+bool Ver4PatriciaTrieNodeWriter::writePtNodeAndAdvancePosition(
+ const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) {
+ return writePtNodeAndGetTerminalIdAndAdvancePosition(ptNodeParams, 0 /* outTerminalId */,
+ ptNodeWritingPos);
+}
+
+
+bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
+ const PtNodeParams *const ptNodeParams, const UnigramProperty *const unigramProperty,
+ int *const ptNodeWritingPos) {
+ int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
+ if (!writePtNodeAndGetTerminalIdAndAdvancePosition(ptNodeParams, &terminalId,
+ ptNodeWritingPos)) {
+ return false;
+ }
+ // Write probability.
+ ProbabilityEntry newProbabilityEntry;
+ const ProbabilityEntry probabilityEntryToWrite = createUpdatedEntryFrom(
+ &newProbabilityEntry, unigramProperty);
+ return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(terminalId,
+ &probabilityEntryToWrite);
+}
+
+bool Ver4PatriciaTrieNodeWriter::addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
+ const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) {
+ if (!mBigramPolicy->addNewEntry(prevWordIds[0], wordId, bigramProperty, outAddedNewEntry)) {
+ AKLOGE("Cannot add new bigram entry. terminalId: %d, targetTerminalId: %d",
+ sourcePtNodeParams->getTerminalId(), targetPtNodeParam->getTerminalId());
+ return false;
+ }
+ const int ptNodePos =
+ mBuffers->getTerminalPositionLookupTable()->getTerminalPtNodePosition(prevWordIds[0]);
+ const PtNodeParams sourcePtNodeParams =
+ mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
+ if (!sourcePtNodeParams.hasBigrams()) {
+ // Update has bigrams flag.
+ return updatePtNodeFlags(sourcePtNodeParams.getHeadPos(),
+ sourcePtNodeParams.isBlacklisted(), sourcePtNodeParams.isNotAWord(),
+ sourcePtNodeParams.isTerminal(), sourcePtNodeParams.hasShortcutTargets(),
+ true /* hasBigrams */,
+ sourcePtNodeParams.getCodePointCount() > 1 /* hasMultipleChars */);
+ }
+ return true;
+}
+
+bool Ver4PatriciaTrieNodeWriter::removeNgramEntry(const WordIdArrayView prevWordIds,
+ const int wordId) {
+ return mBigramPolicy->removeEntry(prevWordIds[0], wordId);
+}
+
+bool Ver4PatriciaTrieNodeWriter::updateAllBigramEntriesAndDeleteUselessEntries(
+ const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount) {
+ return mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(
+ sourcePtNodeParams->getTerminalId(), outBigramEntryCount);
+}
+
+bool Ver4PatriciaTrieNodeWriter::updateAllPositionFields(
+ const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const DictPositionRelocationMap *const dictPositionRelocationMap,
+ int *const outBigramEntryCount) {
+ int parentPos = toBeUpdatedPtNodeParams->getParentPos();
+ if (parentPos != NOT_A_DICT_POS) {
+ PtNodeWriter::PtNodePositionRelocationMap::const_iterator it =
+ dictPositionRelocationMap->mPtNodePositionRelocationMap.find(parentPos);
+ if (it != dictPositionRelocationMap->mPtNodePositionRelocationMap.end()) {
+ parentPos = it->second;
+ }
+ }
+ int writingPos = toBeUpdatedPtNodeParams->getHeadPos()
+ + DynamicPtWritingUtils::NODE_FLAG_FIELD_SIZE;
+ // Write updated parent offset.
+ if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(mTrieBuffer,
+ parentPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) {
+ return false;
+ }
+
+ // Updates children position.
+ int childrenPos = toBeUpdatedPtNodeParams->getChildrenPos();
+ if (childrenPos != NOT_A_DICT_POS) {
+ PtNodeWriter::PtNodeArrayPositionRelocationMap::const_iterator it =
+ dictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.find(childrenPos);
+ if (it != dictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.end()) {
+ childrenPos = it->second;
+ }
+ }
+ if (!updateChildrenPosition(toBeUpdatedPtNodeParams, childrenPos)) {
+ return false;
+ }
+
+ // Counts bigram entries.
+ if (outBigramEntryCount) {
+ *outBigramEntryCount = mBigramPolicy->getBigramEntryConut(
+ toBeUpdatedPtNodeParams->getTerminalId());
+ }
+ return true;
+}
+
+bool Ver4PatriciaTrieNodeWriter::addShortcutTarget(const PtNodeParams *const ptNodeParams,
+ const int *const targetCodePoints, const int targetCodePointCount,
+ const int shortcutProbability) {
+ if (!mShortcutPolicy->addNewShortcut(ptNodeParams->getTerminalId(),
+ targetCodePoints, targetCodePointCount, shortcutProbability)) {
+ AKLOGE("Cannot add new shortuct entry. terminalId: %d", ptNodeParams->getTerminalId());
+ return false;
+ }
+ if (!ptNodeParams->hasShortcutTargets()) {
+ // Update has shortcut targets flag.
+ return updatePtNodeFlags(ptNodeParams->getHeadPos(),
+ ptNodeParams->isBlacklisted(), ptNodeParams->isNotAWord(),
+ ptNodeParams->isTerminal(), true /* hasShortcutTargets */,
+ ptNodeParams->hasBigrams(),
+ ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
+ }
+ return true;
+}
+
+bool Ver4PatriciaTrieNodeWriter::updatePtNodeHasBigramsAndShortcutTargetsFlags(
+ const PtNodeParams *const ptNodeParams) {
+ const bool hasBigrams = mBuffers->getBigramDictContent()->getBigramListHeadPos(
+ ptNodeParams->getTerminalId()) != NOT_A_DICT_POS;
+ const bool hasShortcutTargets = mBuffers->getShortcutDictContent()->getShortcutListHeadPos(
+ ptNodeParams->getTerminalId()) != NOT_A_DICT_POS;
+ return updatePtNodeFlags(ptNodeParams->getHeadPos(), ptNodeParams->isBlacklisted(),
+ ptNodeParams->isNotAWord(), ptNodeParams->isTerminal(), hasShortcutTargets,
+ hasBigrams, ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
+}
+
+bool Ver4PatriciaTrieNodeWriter::writePtNodeAndGetTerminalIdAndAdvancePosition(
+ const PtNodeParams *const ptNodeParams, int *const outTerminalId,
+ int *const ptNodeWritingPos) {
+ const int nodePos = *ptNodeWritingPos;
+ // Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
+ // PtNode writing.
+ if (!DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer,
+ 0 /* nodeFlags */, ptNodeWritingPos)) {
+ return false;
+ }
+ // Calculate a parent offset and write the offset.
+ if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(mTrieBuffer,
+ ptNodeParams->getParentPos(), nodePos, ptNodeWritingPos)) {
+ return false;
+ }
+ // Write code points
+ if (!DynamicPtWritingUtils::writeCodePointsAndAdvancePosition(mTrieBuffer,
+ ptNodeParams->getCodePoints(), ptNodeParams->getCodePointCount(), ptNodeWritingPos)) {
+ return false;
+ }
+ int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
+ if (!ptNodeParams->willBecomeNonTerminal()) {
+ if (ptNodeParams->getTerminalId() != Ver4DictConstants::NOT_A_TERMINAL_ID) {
+ terminalId = ptNodeParams->getTerminalId();
+ } else if (ptNodeParams->isTerminal()) {
+ // Write terminal information using a new terminal id.
+ // Get a new unused terminal id.
+ terminalId = mBuffers->getTerminalPositionLookupTable()->getNextTerminalId();
+ }
+ }
+ const int isTerminal = terminalId != Ver4DictConstants::NOT_A_TERMINAL_ID;
+ if (isTerminal) {
+ // Update the lookup table.
+ if (!mBuffers->getMutableTerminalPositionLookupTable()->setTerminalPtNodePosition(
+ terminalId, nodePos)) {
+ return false;
+ }
+ // Write terminal Id.
+ if (!mTrieBuffer->writeUintAndAdvancePosition(terminalId,
+ Ver4DictConstants::TERMINAL_ID_FIELD_SIZE, ptNodeWritingPos)) {
+ return false;
+ }
+ if (outTerminalId) {
+ *outTerminalId = terminalId;
+ }
+ }
+ // Write children position
+ if (!DynamicPtWritingUtils::writeChildrenPositionAndAdvancePosition(mTrieBuffer,
+ ptNodeParams->getChildrenPos(), ptNodeWritingPos)) {
+ return false;
+ }
+ return updatePtNodeFlags(nodePos, ptNodeParams->isBlacklisted(), ptNodeParams->isNotAWord(),
+ isTerminal, ptNodeParams->hasShortcutTargets(), ptNodeParams->hasBigrams(),
+ ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
+}
+
+const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom(
+ const ProbabilityEntry *const originalProbabilityEntry,
+ const UnigramProperty *const unigramProperty) const {
+ // TODO: Consolidate historical info and probability.
+ if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
+ const HistoricalInfo historicalInfoForUpdate(unigramProperty->getTimestamp(),
+ unigramProperty->getLevel(), unigramProperty->getCount());
+ const HistoricalInfo updatedHistoricalInfo =
+ ForgettingCurveUtils::createUpdatedHistoricalInfo(
+ originalProbabilityEntry->getHistoricalInfo(),
+ unigramProperty->getProbability(), &historicalInfoForUpdate, mHeaderPolicy);
+ return originalProbabilityEntry->createEntryWithUpdatedHistoricalInfo(
+ &updatedHistoricalInfo);
+ } else {
+ return originalProbabilityEntry->createEntryWithUpdatedProbability(
+ unigramProperty->getProbability());
+ }
+}
+
+bool Ver4PatriciaTrieNodeWriter::updatePtNodeFlags(const int ptNodePos,
+ const bool isBlacklisted, const bool isNotAWord, const bool isTerminal,
+ const bool hasShortcutTargets, const bool hasBigrams, const bool hasMultipleChars) {
+ // Create node flags and write them.
+ PatriciaTrieReadingUtils::NodeFlags nodeFlags =
+ PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord, isTerminal,
+ hasShortcutTargets, hasBigrams, hasMultipleChars,
+ CHILDREN_POSITION_FIELD_SIZE);
+ if (!DynamicPtWritingUtils::writeFlags(mTrieBuffer, nodeFlags, ptNodePos)) {
+ AKLOGE("Cannot write PtNode flags. flags: %x, pos: %d", nodeFlags, ptNodePos);
+ return false;
+ }
+ return true;
+}
+
+} // namespace v402
+} // namespace backward
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h
new file mode 100644
index 0000000..f10b7b0
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h
@@ -0,0 +1,145 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
+ */
+
+#ifndef LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_NODE_WRITER_H
+#define LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_NODE_WRITER_H
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h"
+#include "third_party/android_prediction/utils/int_array_view.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+} // namespace v402
+} // namespace backward
+class BufferWithExtendableBuffer;
+namespace backward {
+namespace v402 {
+} // namespace v402
+} // namespace backward
+class HeaderPolicy;
+namespace backward {
+namespace v402 {
+class Ver4BigramListPolicy;
+class Ver4DictBuffers;
+class Ver4PatriciaTrieNodeReader;
+class Ver4PtNodeArrayReader;
+class Ver4ShortcutListPolicy;
+
+/*
+ * This class is used for helping to writes nodes of ver4 patricia trie.
+ */
+class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
+ public:
+ Ver4PatriciaTrieNodeWriter(BufferWithExtendableBuffer *const trieBuffer,
+ Ver4DictBuffers *const buffers, const HeaderPolicy *const headerPolicy,
+ const PtNodeReader *const ptNodeReader,
+ const PtNodeArrayReader *const ptNodeArrayReader,
+ Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy)
+ : mTrieBuffer(trieBuffer), mBuffers(buffers), mHeaderPolicy(headerPolicy),
+ mPtNodeReader(ptNodeReader), mReadingHelper(ptNodeReader, ptNodeArrayReader),
+ mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy) {}
+
+ virtual ~Ver4PatriciaTrieNodeWriter() {}
+
+ virtual bool markPtNodeAsDeleted(const PtNodeParams *const toBeUpdatedPtNodeParams);
+
+ virtual bool markPtNodeAsMoved(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const int movedPos, const int bigramLinkedNodePos);
+
+ virtual bool markPtNodeAsWillBecomeNonTerminal(
+ const PtNodeParams *const toBeUpdatedPtNodeParams);
+
+ virtual bool updatePtNodeUnigramProperty(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const UnigramProperty *const unigramProperty);
+
+ virtual bool updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(
+ const PtNodeParams *const toBeUpdatedPtNodeParams, bool *const outNeedsToKeepPtNode);
+
+ virtual bool updateChildrenPosition(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const int newChildrenPosition);
+
+ bool updateTerminalId(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const int newTerminalId);
+
+ virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
+ int *const ptNodeWritingPos);
+
+ virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
+ const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos);
+
+ virtual bool addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
+ const BigramProperty *const bigramProperty, bool *const outAddedNewEntry);
+
+ virtual bool removeNgramEntry(const WordIdArrayView prevWordIds, const int wordId);
+
+ virtual bool updateAllBigramEntriesAndDeleteUselessEntries(
+ const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount);
+
+ virtual bool updateAllPositionFields(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const DictPositionRelocationMap *const dictPositionRelocationMap,
+ int *const outBigramEntryCount);
+
+ virtual bool addShortcutTarget(const PtNodeParams *const ptNodeParams,
+ const int *const targetCodePoints, const int targetCodePointCount,
+ const int shortcutProbability);
+
+ bool updatePtNodeHasBigramsAndShortcutTargetsFlags(const PtNodeParams *const ptNodeParams);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeWriter);
+
+ bool writePtNodeAndGetTerminalIdAndAdvancePosition(
+ const PtNodeParams *const ptNodeParams, int *const outTerminalId,
+ int *const ptNodeWritingPos);
+
+ // Create updated probability entry using given unigram property. In addition to the
+ // probability, this method updates historical information if needed.
+ // TODO: Update flags belonging to the unigram property.
+ const ProbabilityEntry createUpdatedEntryFrom(
+ const ProbabilityEntry *const originalProbabilityEntry,
+ const UnigramProperty *const unigramProperty) const;
+
+ bool updatePtNodeFlags(const int ptNodePos, const bool isBlacklisted, const bool isNotAWord,
+ const bool isTerminal, const bool hasShortcutTargets, const bool hasBigrams,
+ const bool hasMultipleChars);
+
+ static const int CHILDREN_POSITION_FIELD_SIZE;
+
+ BufferWithExtendableBuffer *const mTrieBuffer;
+ Ver4DictBuffers *const mBuffers;
+ const HeaderPolicy *const mHeaderPolicy;
+ const PtNodeReader *const mPtNodeReader;
+ DynamicPtReadingHelper mReadingHelper;
+ Ver4BigramListPolicy *const mBigramPolicy;
+ Ver4ShortcutListPolicy *const mShortcutPolicy;
+};
+} // namespace v402
+} // namespace backward
+} // namespace latinime
+#endif /* LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_NODE_WRITER_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
new file mode 100644
index 0000000..e654c58
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
@@ -0,0 +1,541 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!!
+ * Do not edit this file other than updating policy's interface.
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h"
+
+#include <vector>
+
+#include "third_party/android_prediction/suggest/core/dicnode/dic_node.h"
+#include "third_party/android_prediction/suggest/core/dicnode/dic_node_vector.h"
+#include "third_party/android_prediction/suggest/core/dictionary/ngram_listener.h"
+#include "third_party/android_prediction/suggest/core/dictionary/property/bigram_property.h"
+#include "third_party/android_prediction/suggest/core/dictionary/property/unigram_property.h"
+#include "third_party/android_prediction/suggest/core/dictionary/property/word_property.h"
+#include "third_party/android_prediction/suggest/core/session/prev_words_info.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/probability_utils.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+// Note that there are corresponding definitions in Java side in BinaryDictionaryTests and
+// BinaryDictionaryDecayingTests.
+const char *const Ver4PatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
+const char *const Ver4PatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
+const char *const Ver4PatriciaTriePolicy::MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT";
+const char *const Ver4PatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT";
+const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024;
+const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
+ Ver4DictConstants::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
+
+void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
+ DicNodeVector *const childDicNodes) const {
+ if (!dicNode->hasChildren()) {
+ return;
+ }
+ DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
+ readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos());
+ while (!readingHelper.isEnd()) {
+ const PtNodeParams ptNodeParams = readingHelper.getPtNodeParams();
+ if (!ptNodeParams.isValid()) {
+ break;
+ }
+ bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted();
+ if (isTerminal && mHeaderPolicy->isDecayingDict()) {
+ // A DecayingDict may have a terminal PtNode that has a terminal DicNode whose
+ // probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a
+ // valid terminal DicNode.
+ isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY;
+ }
+ readingHelper.readNextSiblingNode(ptNodeParams);
+ if (ptNodeParams.representsNonWordInfo()) {
+ // Skip PtNodes that represent non-word information.
+ continue;
+ }
+ childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(),
+ ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal,
+ ptNodeParams.hasChildren(),
+ ptNodeParams.isBlacklisted()
+ || ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */,
+ ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints());
+ }
+ if (readingHelper.isError()) {
+ mIsCorrupted = true;
+ AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
+ }
+}
+
+int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
+ const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
+ int *const outUnigramProbability) const {
+ DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
+ readingHelper.initWithPtNodePos(ptNodePos);
+ const int codePointCount = readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount(
+ maxCodePointCount, outCodePoints, outUnigramProbability);
+ if (readingHelper.isError()) {
+ mIsCorrupted = true;
+ AKLOGE("Dictionary reading error in getCodePointsAndProbabilityAndReturnCodePointCount().");
+ }
+ return codePointCount;
+}
+
+int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
+ const int length, const bool forceLowerCaseSearch) const {
+ DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
+ readingHelper.initWithPtNodeArrayPos(getRootPosition());
+ const int ptNodePos =
+ readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
+ if (readingHelper.isError()) {
+ mIsCorrupted = true;
+ AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
+ }
+ return ptNodePos;
+}
+
+int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability,
+ const int bigramProbability) const {
+ if (mHeaderPolicy->isDecayingDict()) {
+ // Both probabilities are encoded. Decode them and get probability.
+ return ForgettingCurveUtils::getProbability(unigramProbability, bigramProbability);
+ } else {
+ if (unigramProbability == NOT_A_PROBABILITY) {
+ return NOT_A_PROBABILITY;
+ } else if (bigramProbability == NOT_A_PROBABILITY) {
+ return ProbabilityUtils::backoff(unigramProbability);
+ } else {
+ return bigramProbability;
+ }
+ }
+}
+
+int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const int *const prevWordsPtNodePos,
+ const int ptNodePos) const {
+ if (ptNodePos == NOT_A_DICT_POS) {
+ return NOT_A_PROBABILITY;
+ }
+ const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
+ if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) {
+ return NOT_A_PROBABILITY;
+ }
+ if (prevWordsPtNodePos) {
+ const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]);
+ BinaryDictionaryBigramsIterator bigramsIt(&mBigramPolicy, bigramsPosition);
+ while (bigramsIt.hasNext()) {
+ bigramsIt.next();
+ if (bigramsIt.getBigramPos() == ptNodePos
+ && bigramsIt.getProbability() != NOT_A_PROBABILITY) {
+ return getProbability(ptNodeParams.getProbability(), bigramsIt.getProbability());
+ }
+ }
+ return NOT_A_PROBABILITY;
+ }
+ return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
+}
+
+void Ver4PatriciaTriePolicy::iterateNgramEntries(const int *const prevWordsPtNodePos,
+ NgramListener *const listener) const {
+ if (!prevWordsPtNodePos) {
+ return;
+ }
+ const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]);
+ BinaryDictionaryBigramsIterator bigramsIt(&mBigramPolicy, bigramsPosition);
+ while (bigramsIt.hasNext()) {
+ bigramsIt.next();
+ listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos());
+ }
+}
+
+int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
+ if (ptNodePos == NOT_A_DICT_POS) {
+ return NOT_A_DICT_POS;
+ }
+ const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
+ if (ptNodeParams.isDeleted()) {
+ return NOT_A_DICT_POS;
+ }
+ return mBuffers->getShortcutDictContent()->getShortcutListHeadPos(
+ ptNodeParams.getTerminalId());
+}
+
+int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
+ if (ptNodePos == NOT_A_DICT_POS) {
+ return NOT_A_DICT_POS;
+ }
+ const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
+ if (ptNodeParams.isDeleted()) {
+ return NOT_A_DICT_POS;
+ }
+ return mBuffers->getBigramDictContent()->getBigramListHeadPos(
+ ptNodeParams.getTerminalId());
+}
+
+bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int length,
+ const UnigramProperty *const unigramProperty) {
+ if (!mBuffers->isUpdatable()) {
+ AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary.");
+ return false;
+ }
+ if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
+ AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
+ mDictBuffer->getTailPosition());
+ return false;
+ }
+ if (length > MAX_WORD_LENGTH) {
+ AKLOGE("The word is too long to insert to the dictionary, length: %d", length);
+ return false;
+ }
+ for (const auto &shortcut : unigramProperty->getShortcuts()) {
+ if (shortcut.getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
+ AKLOGE("One of shortcut targets is too long to insert to the dictionary, length: %d",
+ shortcut.getTargetCodePoints()->size());
+ return false;
+ }
+ }
+ DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
+ readingHelper.initWithPtNodeArrayPos(getRootPosition());
+ bool addedNewUnigram = false;
+ int codePointsToAdd[MAX_WORD_LENGTH];
+ int codePointCountToAdd = length;
+ memmove(codePointsToAdd, word, sizeof(int) * length);
+ if (unigramProperty->representsBeginningOfSentence()) {
+ codePointCountToAdd = CharUtils::attachBeginningOfSentenceMarker(codePointsToAdd,
+ codePointCountToAdd, MAX_WORD_LENGTH);
+ }
+ if (codePointCountToAdd <= 0) {
+ return false;
+ }
+ if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointsToAdd, codePointCountToAdd,
+ unigramProperty, &addedNewUnigram)) {
+ if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) {
+ mUnigramCount++;
+ }
+ if (unigramProperty->getShortcuts().size() > 0) {
+ // Add shortcut target.
+ const int wordPos = getTerminalPtNodePositionOfWord(word, length,
+ false /* forceLowerCaseSearch */);
+ if (wordPos == NOT_A_DICT_POS) {
+ AKLOGE("Cannot find terminal PtNode position to add shortcut target.");
+ return false;
+ }
+ for (const auto &shortcut : unigramProperty->getShortcuts()) {
+ if (!mUpdatingHelper.addShortcutTarget(wordPos,
+ shortcut.getTargetCodePoints()->data(),
+ shortcut.getTargetCodePoints()->size(), shortcut.getProbability())) {
+ AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %d, "
+ "probability: %d", wordPos, shortcut.getTargetCodePoints()->size(),
+ shortcut.getProbability());
+ return false;
+ }
+ }
+ }
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ const BigramProperty *const bigramProperty) {
+ if (!mBuffers->isUpdatable()) {
+ AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
+ return false;
+ }
+ if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
+ AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
+ mDictBuffer->getTailPosition());
+ return false;
+ }
+ if (!prevWordsInfo->isValid()) {
+ AKLOGE("prev words info is not valid for adding n-gram entry to the dictionary.");
+ return false;
+ }
+ if (bigramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
+ AKLOGE("The word is too long to insert the ngram to the dictionary. "
+ "length: %d", bigramProperty->getTargetCodePoints()->size());
+ return false;
+ }
+ int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
+ false /* tryLowerCaseSearch */);
+ // TODO: Support N-gram.
+ if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
+ if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)) {
+ const std::vector<UnigramProperty::ShortcutProperty> shortcuts;
+ const UnigramProperty beginningOfSentenceUnigramProperty(
+ true /* representsBeginningOfSentence */, true /* isNotAWord */,
+ false /* isBlacklisted */, MAX_PROBABILITY /* probability */,
+ NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
+ if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
+ prevWordsInfo->getNthPrevWordCodePointCount(1 /* n */),
+ &beginningOfSentenceUnigramProperty)) {
+ AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
+ return false;
+ }
+ // Refresh Terminal PtNode positions.
+ prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
+ false /* tryLowerCaseSearch */);
+ } else {
+ return false;
+ }
+ }
+ const int word1Pos = getTerminalPtNodePositionOfWord(
+ bigramProperty->getTargetCodePoints()->data(),
+ bigramProperty->getTargetCodePoints()->size(), false /* forceLowerCaseSearch */);
+ if (word1Pos == NOT_A_DICT_POS) {
+ return false;
+ }
+ bool addedNewBigram = false;
+ if (mUpdatingHelper.addNgramEntry(PtNodePosArrayView::fromObject(prevWordsPtNodePos),
+ word1Pos, bigramProperty, &addedNewBigram)) {
+ if (addedNewBigram) {
+ mBigramCount++;
+ }
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ const int *const word, const int length) {
+ if (!mBuffers->isUpdatable()) {
+ AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
+ return false;
+ }
+ if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
+ AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
+ mDictBuffer->getTailPosition());
+ return false;
+ }
+ if (!prevWordsInfo->isValid()) {
+ AKLOGE("prev words info is not valid for removing n-gram entry form the dictionary.");
+ return false;
+ }
+ if (length > MAX_WORD_LENGTH) {
+ AKLOGE("word is too long to remove n-gram entry form the dictionary. length: %d", length);
+ }
+ int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
+ false /* tryLowerCaseSerch */);
+ // TODO: Support N-gram.
+ if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
+ return false;
+ }
+ const int wordPos = getTerminalPtNodePositionOfWord(word, length,
+ false /* forceLowerCaseSearch */);
+ if (wordPos == NOT_A_DICT_POS) {
+ return false;
+ }
+ if (mUpdatingHelper.removeNgramEntry(
+ PtNodePosArrayView::fromObject(prevWordsPtNodePos), wordPos)) {
+ mBigramCount--;
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool Ver4PatriciaTriePolicy::flush(const char *const filePath) {
+ if (!mBuffers->isUpdatable()) {
+ AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath);
+ return false;
+ }
+ if (!mWritingHelper.writeToDictFile(filePath, mUnigramCount, mBigramCount)) {
+ AKLOGE("Cannot flush the dictionary to file.");
+ mIsCorrupted = true;
+ return false;
+ }
+ return true;
+}
+
+bool Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) {
+ if (!mBuffers->isUpdatable()) {
+ AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
+ return false;
+ }
+ if (!mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath)) {
+ AKLOGE("Cannot flush the dictionary to file with GC.");
+ mIsCorrupted = true;
+ return false;
+ }
+ return true;
+}
+
+bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
+ if (!mBuffers->isUpdatable()) {
+ AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
+ return false;
+ }
+ if (mBuffers->isNearSizeLimit()) {
+ // Additional buffer size is near the limit.
+ return true;
+ } else if (mHeaderPolicy->getExtendedRegionSize() + mDictBuffer->getUsedAdditionalBufferSize()
+ > Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE) {
+ // Total extended region size of the trie exceeds the limit.
+ return true;
+ } else if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS
+ && mDictBuffer->getUsedAdditionalBufferSize() > 0) {
+ // Needs to reduce dictionary size.
+ return true;
+ } else if (mHeaderPolicy->isDecayingDict()) {
+ return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mUnigramCount, mBigramCount,
+ mHeaderPolicy);
+ }
+ return false;
+}
+
+void Ver4PatriciaTriePolicy::getProperty(const char *const query, const int queryLength,
+ char *const outResult, const int maxResultLength) {
+ const int compareLength = queryLength + 1 /* terminator */;
+ if (strncmp(query, UNIGRAM_COUNT_QUERY, compareLength) == 0) {
+ snprintf(outResult, maxResultLength, "%d", mUnigramCount);
+ } else if (strncmp(query, BIGRAM_COUNT_QUERY, compareLength) == 0) {
+ snprintf(outResult, maxResultLength, "%d", mBigramCount);
+ } else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, compareLength) == 0) {
+ snprintf(outResult, maxResultLength, "%d",
+ mHeaderPolicy->isDecayingDict() ?
+ ForgettingCurveUtils::getUnigramCountHardLimit(
+ mHeaderPolicy->getMaxUnigramCount()) :
+ static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
+ } else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, compareLength) == 0) {
+ snprintf(outResult, maxResultLength, "%d",
+ mHeaderPolicy->isDecayingDict() ?
+ ForgettingCurveUtils::getBigramCountHardLimit(
+ mHeaderPolicy->getMaxBigramCount()) :
+ static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
+ }
+}
+
+const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const codePoints,
+ const int codePointCount) const {
+ const int ptNodePos = getTerminalPtNodePositionOfWord(codePoints, codePointCount,
+ false /* forceLowerCaseSearch */);
+ if (ptNodePos == NOT_A_DICT_POS) {
+ AKLOGE("getWordProperty is called for invalid word.");
+ return WordProperty();
+ }
+ const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
+ std::vector<int> codePointVector(ptNodeParams.getCodePoints(),
+ ptNodeParams.getCodePoints() + ptNodeParams.getCodePointCount());
+ const ProbabilityEntry probabilityEntry =
+ mBuffers->getProbabilityDictContent()->getProbabilityEntry(
+ ptNodeParams.getTerminalId());
+ const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
+ // Fetch bigram information.
+ std::vector<BigramProperty> bigrams;
+ const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos);
+ if (bigramListPos != NOT_A_DICT_POS) {
+ int bigramWord1CodePoints[MAX_WORD_LENGTH];
+ const BigramDictContent *const bigramDictContent = mBuffers->getBigramDictContent();
+ const TerminalPositionLookupTable *const terminalPositionLookupTable =
+ mBuffers->getTerminalPositionLookupTable();
+ bool hasNext = true;
+ int readingPos = bigramListPos;
+ while (hasNext) {
+ const BigramEntry bigramEntry =
+ bigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
+ hasNext = bigramEntry.hasNext();
+ const int word1TerminalId = bigramEntry.getTargetTerminalId();
+ const int word1TerminalPtNodePos =
+ terminalPositionLookupTable->getTerminalPtNodePosition(word1TerminalId);
+ if (word1TerminalPtNodePos == NOT_A_DICT_POS) {
+ continue;
+ }
+ // Word (unigram) probability
+ int word1Probability = NOT_A_PROBABILITY;
+ const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
+ word1TerminalPtNodePos, MAX_WORD_LENGTH, bigramWord1CodePoints,
+ &word1Probability);
+ const std::vector<int> word1(bigramWord1CodePoints,
+ bigramWord1CodePoints + codePointCount);
+ const HistoricalInfo *const historicalInfo = bigramEntry.getHistoricalInfo();
+ const int probability = bigramEntry.hasHistoricalInfo() ?
+ ForgettingCurveUtils::decodeProbability(
+ bigramEntry.getHistoricalInfo(), mHeaderPolicy) :
+ bigramEntry.getProbability();
+ bigrams.emplace_back(&word1, probability,
+ historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
+ historicalInfo->getCount());
+ }
+ }
+ // Fetch shortcut information.
+ std::vector<UnigramProperty::ShortcutProperty> shortcuts;
+ int shortcutPos = getShortcutPositionOfPtNode(ptNodePos);
+ if (shortcutPos != NOT_A_DICT_POS) {
+ int shortcutTarget[MAX_WORD_LENGTH];
+ const ShortcutDictContent *const shortcutDictContent =
+ mBuffers->getShortcutDictContent();
+ bool hasNext = true;
+ while (hasNext) {
+ int shortcutTargetLength = 0;
+ int shortcutProbability = NOT_A_PROBABILITY;
+ shortcutDictContent->getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, shortcutTarget,
+ &shortcutTargetLength, &shortcutProbability, &hasNext, &shortcutPos);
+ const std::vector<int> target(shortcutTarget, shortcutTarget + shortcutTargetLength);
+ shortcuts.emplace_back(&target, shortcutProbability);
+ }
+ }
+ const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
+ ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
+ historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
+ historicalInfo->getCount(), &shortcuts);
+ return WordProperty(&codePointVector, &unigramProperty, &bigrams);
+}
+
+int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints,
+ int *const outCodePointCount) {
+ *outCodePointCount = 0;
+ if (token == 0) {
+ mTerminalPtNodePositionsForIteratingWords.clear();
+ DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy(
+ &mTerminalPtNodePositionsForIteratingWords);
+ DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
+ readingHelper.initWithPtNodeArrayPos(getRootPosition());
+ readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(&traversePolicy);
+ }
+ const int terminalPtNodePositionsVectorSize =
+ static_cast<int>(mTerminalPtNodePositionsForIteratingWords.size());
+ if (token < 0 || token >= terminalPtNodePositionsVectorSize) {
+ AKLOGE("Given token %d is invalid.", token);
+ return 0;
+ }
+ const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token];
+ int unigramProbability = NOT_A_PROBABILITY;
+ *outCodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
+ terminalPtNodePos, MAX_WORD_LENGTH, outCodePoints, &unigramProbability);
+ const int nextToken = token + 1;
+ if (nextToken >= terminalPtNodePositionsVectorSize) {
+ // All words have been iterated.
+ mTerminalPtNodePositionsForIteratingWords.clear();
+ return 0;
+ }
+ return nextToken;
+}
+
+} // namespace v402
+} // namespace backward
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
new file mode 100644
index 0000000..a5c082d
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
@@ -0,0 +1,173 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!!
+ * Do not edit this file other than updating policy's interface.
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
+ */
+
+#ifndef LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_POLICY_H
+#define LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_POLICY_H
+
+#include <vector>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/header/header_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/shortcut/ver4_shortcut_list_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+} // namespace v402
+} // namespace backward
+class DicNode;
+namespace backward {
+namespace v402 {
+} // namespace v402
+} // namespace backward
+class DicNodeVector;
+namespace backward {
+namespace v402 {
+
+class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
+ public:
+ Ver4PatriciaTriePolicy(Ver4DictBuffers::Ver4DictBuffersPtr buffers)
+ : mBuffers(std::move(buffers)), mHeaderPolicy(mBuffers->getHeaderPolicy()),
+ mDictBuffer(mBuffers->getWritableTrieBuffer()),
+ mBigramPolicy(mBuffers->getMutableBigramDictContent(),
+ mBuffers->getTerminalPositionLookupTable(), mHeaderPolicy),
+ mShortcutPolicy(mBuffers->getMutableShortcutDictContent(),
+ mBuffers->getTerminalPositionLookupTable()),
+ mNodeReader(mDictBuffer, mBuffers->getProbabilityDictContent(), mHeaderPolicy),
+ mPtNodeArrayReader(mDictBuffer),
+ mNodeWriter(mDictBuffer, mBuffers.get(), mHeaderPolicy, &mNodeReader,
+ &mPtNodeArrayReader, &mBigramPolicy, &mShortcutPolicy),
+ mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter),
+ mWritingHelper(mBuffers.get()),
+ mUnigramCount(mHeaderPolicy->getUnigramCount()),
+ mBigramCount(mHeaderPolicy->getBigramCount()),
+ mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {};
+
+ AK_FORCE_INLINE int getRootPosition() const {
+ return 0;
+ }
+
+ void createAndGetAllChildDicNodes(const DicNode *const dicNode,
+ DicNodeVector *const childDicNodes) const;
+
+ int getCodePointsAndProbabilityAndReturnCodePointCount(
+ const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints,
+ int *const outUnigramProbability) const;
+
+ int getTerminalPtNodePositionOfWord(const int *const inWord,
+ const int length, const bool forceLowerCaseSearch) const;
+
+ int getProbability(const int unigramProbability, const int bigramProbability) const;
+
+ int getProbabilityOfPtNode(const int *const prevWordsPtNodePos, const int ptNodePos) const;
+
+ void iterateNgramEntries(const int *const prevWordsPtNodePos,
+ NgramListener *const listener) const;
+
+ int getShortcutPositionOfPtNode(const int ptNodePos) const;
+
+ const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
+ return mHeaderPolicy;
+ }
+
+ const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
+ return &mShortcutPolicy;
+ }
+
+ bool addUnigramEntry(const int *const word, const int length,
+ const UnigramProperty *const unigramProperty);
+
+ bool removeUnigramEntry(const int *const word, const int length) {
+ // Removing unigram entry is not supported.
+ return false;
+ }
+
+ bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ const BigramProperty *const bigramProperty);
+
+ bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word1,
+ const int length1);
+
+ bool flush(const char *const filePath);
+
+ bool flushWithGC(const char *const filePath);
+
+ bool needsToRunGC(const bool mindsBlockByGC) const;
+
+ void getProperty(const char *const query, const int queryLength, char *const outResult,
+ const int maxResultLength);
+
+ const WordProperty getWordProperty(const int *const codePoints,
+ const int codePointCount) const;
+
+ int getNextWordAndNextToken(const int token, int *const outCodePoints,
+ int *const outCodePointCount);
+
+ bool isCorrupted() const {
+ return mIsCorrupted;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy);
+
+ static const char *const UNIGRAM_COUNT_QUERY;
+ static const char *const BIGRAM_COUNT_QUERY;
+ static const char *const MAX_UNIGRAM_COUNT_QUERY;
+ static const char *const MAX_BIGRAM_COUNT_QUERY;
+ // When the dictionary size is near the maximum size, we have to refuse dynamic operations to
+ // prevent the dictionary from overflowing.
+ static const int MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
+ static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
+
+ const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
+ const HeaderPolicy *const mHeaderPolicy;
+ BufferWithExtendableBuffer *const mDictBuffer;
+ Ver4BigramListPolicy mBigramPolicy;
+ Ver4ShortcutListPolicy mShortcutPolicy;
+ Ver4PatriciaTrieNodeReader mNodeReader;
+ Ver4PtNodeArrayReader mPtNodeArrayReader;
+ Ver4PatriciaTrieNodeWriter mNodeWriter;
+ DynamicPtUpdatingHelper mUpdatingHelper;
+ Ver4PatriciaTrieWritingHelper mWritingHelper;
+ int mUnigramCount;
+ int mBigramCount;
+ std::vector<int> mTerminalPtNodePositionsForIteratingWords;
+ mutable bool mIsCorrupted;
+
+ int getBigramsPositionOfPtNode(const int ptNodePos) const;
+};
+} // namespace v402
+} // namespace backward
+} // namespace latinime
+#endif // LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_POLICY_H
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_reading_utils.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_reading_utils.cpp
new file mode 100644
index 0000000..c381ea5
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_reading_utils.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_reading_utils.h"
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/byte_array_utils.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+/* static */ int Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(
+ const uint8_t *const buffer, int *pos) {
+ return ByteArrayUtils::readUint32AndAdvancePosition(buffer, pos);
+}
+
+} // namespace v402
+} // namespace backward
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_reading_utils.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_reading_utils.h
new file mode 100644
index 0000000..9f87d2d
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_reading_utils.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h
+ */
+
+#ifndef LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_READING_UTILS_H
+#define LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_READING_UTILS_H
+
+#include <cstdint>
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+} // namespace v402
+} // namespace backward
+class BufferWithExtendableBuffer;
+namespace backward {
+namespace v402 {
+
+class Ver4PatriciaTrieReadingUtils {
+ public:
+ static int getTerminalIdAndAdvancePosition(const uint8_t *const buffer,
+ int *const pos);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTrieReadingUtils);
+};
+} // namespace v402
+} // namespace backward
+} // namespace latinime
+#endif /* LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_READING_UTILS_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.cpp
new file mode 100644
index 0000000..08d1c09
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.cpp
@@ -0,0 +1,304 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.h"
+
+#include <cstring>
+#include <queue>
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/header/header_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/shortcut/ver4_shortcut_list_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/file_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+bool Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const dictDirPath,
+ const int unigramCount, const int bigramCount) const {
+ const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy();
+ BufferWithExtendableBuffer headerBuffer(
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
+ const int extendedRegionSize = headerPolicy->getExtendedRegionSize()
+ + mBuffers->getTrieBuffer()->getUsedAdditionalBufferSize();
+ if (!headerPolicy->fillInAndWriteHeaderToBuffer(false /* updatesLastDecayedTime */,
+ unigramCount, bigramCount, extendedRegionSize, &headerBuffer)) {
+ AKLOGE("Cannot write header structure to buffer. "
+ "updatesLastDecayedTime: %d, unigramCount: %d, bigramCount: %d, "
+ "extendedRegionSize: %d", false, unigramCount, bigramCount,
+ extendedRegionSize);
+ return false;
+ }
+ return mBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer);
+}
+
+bool Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos,
+ const char *const dictDirPath) {
+ const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy();
+ Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers(
+ Ver4DictBuffers::createVer4DictBuffers(headerPolicy,
+ Ver4DictConstants::MAX_DICTIONARY_SIZE));
+ int unigramCount = 0;
+ int bigramCount = 0;
+ if (!runGC(rootPtNodeArrayPos, headerPolicy, dictBuffers.get(), &unigramCount, &bigramCount)) {
+ return false;
+ }
+ BufferWithExtendableBuffer headerBuffer(
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
+ if (!headerPolicy->fillInAndWriteHeaderToBuffer(true /* updatesLastDecayedTime */,
+ unigramCount, bigramCount, 0 /* extendedRegionSize */, &headerBuffer)) {
+ return false;
+ }
+ return dictBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer);
+}
+
+bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
+ const HeaderPolicy *const headerPolicy, Ver4DictBuffers *const buffersToWrite,
+ int *const outUnigramCount, int *const outBigramCount) {
+ Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(),
+ mBuffers->getProbabilityDictContent(), headerPolicy);
+ Ver4PtNodeArrayReader ptNodeArrayReader(mBuffers->getTrieBuffer());
+ Ver4BigramListPolicy bigramPolicy(mBuffers->getMutableBigramDictContent(),
+ mBuffers->getTerminalPositionLookupTable(), headerPolicy);
+ Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getMutableShortcutDictContent(),
+ mBuffers->getTerminalPositionLookupTable());
+ Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(),
+ mBuffers, headerPolicy, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy,
+ &shortcutPolicy);
+
+ DynamicPtReadingHelper readingHelper(&ptNodeReader, &ptNodeArrayReader);
+ readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ DynamicPtGcEventListeners
+ ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
+ traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
+ &ptNodeWriter);
+ if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
+ &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
+ return false;
+ }
+ const int unigramCount = traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
+ .getValidUnigramCount();
+ const int maxUnigramCount = headerPolicy->getMaxUnigramCount();
+ if (headerPolicy->isDecayingDict() && unigramCount > maxUnigramCount) {
+ if (!truncateUnigrams(&ptNodeReader, &ptNodeWriter, maxUnigramCount)) {
+ AKLOGE("Cannot remove unigrams. current: %d, max: %d", unigramCount,
+ maxUnigramCount);
+ return false;
+ }
+ }
+
+ readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ DynamicPtGcEventListeners::TraversePolicyToUpdateBigramProbability
+ traversePolicyToUpdateBigramProbability(&ptNodeWriter);
+ if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
+ &traversePolicyToUpdateBigramProbability)) {
+ return false;
+ }
+ const int bigramCount = traversePolicyToUpdateBigramProbability.getValidBigramEntryCount();
+ const int maxBigramCount = headerPolicy->getMaxBigramCount();
+ if (headerPolicy->isDecayingDict() && bigramCount > maxBigramCount) {
+ if (!truncateBigrams(maxBigramCount)) {
+ AKLOGE("Cannot remove bigrams. current: %d, max: %d", bigramCount, maxBigramCount);
+ return false;
+ }
+ }
+
+ // Mapping from positions in mBuffer to positions in bufferToWrite.
+ PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap;
+ readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(),
+ buffersToWrite, headerPolicy, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy,
+ &shortcutPolicy);
+ DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
+ traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers,
+ buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap);
+ if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
+ &traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) {
+ return false;
+ }
+
+ // Create policy instances for the GCed dictionary.
+ Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(),
+ buffersToWrite->getProbabilityDictContent(), headerPolicy);
+ Ver4PtNodeArrayReader newPtNodeArrayreader(buffersToWrite->getTrieBuffer());
+ Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getMutableBigramDictContent(),
+ buffersToWrite->getTerminalPositionLookupTable(), headerPolicy);
+ Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getMutableShortcutDictContent(),
+ buffersToWrite->getTerminalPositionLookupTable());
+ Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(),
+ buffersToWrite, headerPolicy, &newPtNodeReader, &newPtNodeArrayreader, &newBigramPolicy,
+ &newShortcutPolicy);
+ // Re-assign terminal IDs for valid terminal PtNodes.
+ TerminalPositionLookupTable::TerminalIdMap terminalIdMap;
+ if(!buffersToWrite->getMutableTerminalPositionLookupTable()->runGCTerminalIds(
+ &terminalIdMap)) {
+ return false;
+ }
+ // Run GC for probability dict content.
+ if (!buffersToWrite->getMutableProbabilityDictContent()->runGC(&terminalIdMap,
+ mBuffers->getProbabilityDictContent())) {
+ return false;
+ }
+ // Run GC for bigram dict content.
+ if(!buffersToWrite->getMutableBigramDictContent()->runGC(&terminalIdMap,
+ mBuffers->getBigramDictContent(), outBigramCount)) {
+ return false;
+ }
+ // Run GC for shortcut dict content.
+ if(!buffersToWrite->getMutableShortcutDictContent()->runGC(&terminalIdMap,
+ mBuffers->getShortcutDictContent())) {
+ return false;
+ }
+ DynamicPtReadingHelper newDictReadingHelper(&newPtNodeReader, &newPtNodeArrayreader);
+ newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ DynamicPtGcEventListeners::TraversePolicyToUpdateAllPositionFields
+ traversePolicyToUpdateAllPositionFields(&newPtNodeWriter, &dictPositionRelocationMap);
+ if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
+ &traversePolicyToUpdateAllPositionFields)) {
+ return false;
+ }
+ newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds
+ traversePolicyToUpdateAllPtNodeFlagsAndTerminalIds(&newPtNodeWriter, &terminalIdMap);
+ if (!newDictReadingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
+ &traversePolicyToUpdateAllPtNodeFlagsAndTerminalIds)) {
+ return false;
+ }
+ *outUnigramCount = traversePolicyToUpdateAllPositionFields.getUnigramCount();
+ return true;
+}
+
+bool Ver4PatriciaTrieWritingHelper::truncateUnigrams(
+ const Ver4PatriciaTrieNodeReader *const ptNodeReader,
+ Ver4PatriciaTrieNodeWriter *const ptNodeWriter, const int maxUnigramCount) {
+ const TerminalPositionLookupTable *const terminalPosLookupTable =
+ mBuffers->getTerminalPositionLookupTable();
+ const int nextTerminalId = terminalPosLookupTable->getNextTerminalId();
+ std::priority_queue<DictProbability, std::vector<DictProbability>, DictProbabilityComparator>
+ priorityQueue;
+ for (int i = 0; i < nextTerminalId; ++i) {
+ const int terminalPos = terminalPosLookupTable->getTerminalPtNodePosition(i);
+ if (terminalPos == NOT_A_DICT_POS) {
+ continue;
+ }
+ const ProbabilityEntry probabilityEntry =
+ mBuffers->getProbabilityDictContent()->getProbabilityEntry(i);
+ const int probability = probabilityEntry.hasHistoricalInfo() ?
+ ForgettingCurveUtils::decodeProbability(
+ probabilityEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) :
+ probabilityEntry.getProbability();
+ priorityQueue.push(DictProbability(terminalPos, probability,
+ probabilityEntry.getHistoricalInfo()->getTimeStamp()));
+ }
+
+ // Delete unigrams.
+ while (static_cast<int>(priorityQueue.size()) > maxUnigramCount) {
+ const int ptNodePos = priorityQueue.top().getDictPos();
+ priorityQueue.pop();
+ const PtNodeParams ptNodeParams =
+ ptNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
+ if (ptNodeParams.representsNonWordInfo()) {
+ continue;
+ }
+ if (!ptNodeWriter->markPtNodeAsWillBecomeNonTerminal(&ptNodeParams)) {
+ AKLOGE("Cannot mark PtNode as willBecomeNonterminal. PtNode pos: %d", ptNodePos);
+ return false;
+ }
+ }
+ return true;
+}
+
+bool Ver4PatriciaTrieWritingHelper::truncateBigrams(const int maxBigramCount) {
+ const TerminalPositionLookupTable *const terminalPosLookupTable =
+ mBuffers->getTerminalPositionLookupTable();
+ const int nextTerminalId = terminalPosLookupTable->getNextTerminalId();
+ std::priority_queue<DictProbability, std::vector<DictProbability>, DictProbabilityComparator>
+ priorityQueue;
+ BigramDictContent *const bigramDictContent = mBuffers->getMutableBigramDictContent();
+ for (int i = 0; i < nextTerminalId; ++i) {
+ const int bigramListPos = bigramDictContent->getBigramListHeadPos(i);
+ if (bigramListPos == NOT_A_DICT_POS) {
+ continue;
+ }
+ bool hasNext = true;
+ int readingPos = bigramListPos;
+ while (hasNext) {
+ const int entryPos = readingPos;
+ const BigramEntry bigramEntry =
+ bigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
+ hasNext = bigramEntry.hasNext();
+ if (!bigramEntry.isValid()) {
+ continue;
+ }
+ const int probability = bigramEntry.hasHistoricalInfo() ?
+ ForgettingCurveUtils::decodeProbability(
+ bigramEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) :
+ bigramEntry.getProbability();
+ priorityQueue.push(DictProbability(entryPos, probability,
+ bigramEntry.getHistoricalInfo()->getTimeStamp()));
+ }
+ }
+
+ // Delete bigrams.
+ while (static_cast<int>(priorityQueue.size()) > maxBigramCount) {
+ const int entryPos = priorityQueue.top().getDictPos();
+ const BigramEntry bigramEntry = bigramDictContent->getBigramEntry(entryPos);
+ const BigramEntry invalidatedBigramEntry = bigramEntry.getInvalidatedEntry();
+ if (!bigramDictContent->writeBigramEntry(&invalidatedBigramEntry, entryPos)) {
+ AKLOGE("Cannot write bigram entry to remove. pos: %d", entryPos);
+ return false;
+ }
+ priorityQueue.pop();
+ }
+ return true;
+}
+
+bool Ver4PatriciaTrieWritingHelper::TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds
+ ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
+ if (!ptNodeParams->isTerminal()) {
+ return true;
+ }
+ TerminalPositionLookupTable::TerminalIdMap::const_iterator it =
+ mTerminalIdMap->find(ptNodeParams->getTerminalId());
+ if (it == mTerminalIdMap->end()) {
+ AKLOGE("terminal Id %d is not in the terminal position map. map size: %zd",
+ ptNodeParams->getTerminalId(), mTerminalIdMap->size());
+ return false;
+ }
+ if (!mPtNodeWriter->updateTerminalId(ptNodeParams, it->second)) {
+ AKLOGE("Cannot update terminal id. %d -> %d", it->first, it->second);
+ }
+ return mPtNodeWriter->updatePtNodeHasBigramsAndShortcutTargetsFlags(ptNodeParams);
+}
+
+} // namespace v402
+} // namespace backward
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.h
new file mode 100644
index 0000000..4c6c96c
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h
+ */
+
+#ifndef LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_WRITING_HELPER_H
+#define LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_WRITING_HELPER_H
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+} // namespace v402
+} // namespace backward
+class HeaderPolicy;
+namespace backward {
+namespace v402 {
+class Ver4DictBuffers;
+class Ver4PatriciaTrieNodeReader;
+class Ver4PatriciaTrieNodeWriter;
+
+class Ver4PatriciaTrieWritingHelper {
+ public:
+ Ver4PatriciaTrieWritingHelper(Ver4DictBuffers *const buffers)
+ : mBuffers(buffers) {}
+
+ bool writeToDictFile(const char *const dictDirPath, const int unigramCount,
+ const int bigramCount) const;
+
+ // This method cannot be const because the original dictionary buffer will be updated to detect
+ // useless PtNodes during GC.
+ bool writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const dictDirPath);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTrieWritingHelper);
+
+ class TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds
+ : public DynamicPtReadingHelper::TraversingEventListener {
+ public:
+ TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds(
+ Ver4PatriciaTrieNodeWriter *const ptNodeWriter,
+ const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap)
+ : mPtNodeWriter(ptNodeWriter), mTerminalIdMap(terminalIdMap) {}
+
+ bool onAscend() { return true; }
+
+ bool onDescend(const int ptNodeArrayPos) { return true; }
+
+ bool onReadingPtNodeArrayTail() { return true; }
+
+ bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds);
+
+ Ver4PatriciaTrieNodeWriter *const mPtNodeWriter;
+ const TerminalPositionLookupTable::TerminalIdMap *const mTerminalIdMap;
+ };
+
+ // For truncateUnigrams() and truncateBigrams().
+ class DictProbability {
+ public:
+ DictProbability(const int dictPos, const int probability, const int timestamp)
+ : mDictPos(dictPos), mProbability(probability), mTimestamp(timestamp) {}
+
+ int getDictPos() const {
+ return mDictPos;
+ }
+
+ int getProbability() const {
+ return mProbability;
+ }
+
+ int getTimestamp() const {
+ return mTimestamp;
+ }
+
+ private:
+ DISALLOW_DEFAULT_CONSTRUCTOR(DictProbability);
+
+ int mDictPos;
+ int mProbability;
+ int mTimestamp;
+ };
+
+ // For truncateUnigrams() and truncateBigrams().
+ class DictProbabilityComparator {
+ public:
+ bool operator()(const DictProbability &left, const DictProbability &right) {
+ if (left.getProbability() != right.getProbability()) {
+ return left.getProbability() > right.getProbability();
+ }
+ if (left.getTimestamp() != right.getTimestamp()) {
+ return left.getTimestamp() < right.getTimestamp();
+ }
+ return left.getDictPos() > right.getDictPos();
+ }
+
+ private:
+ DISALLOW_ASSIGNMENT_OPERATOR(DictProbabilityComparator);
+ };
+
+ bool runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy,
+ Ver4DictBuffers *const buffersToWrite, int *const outUnigramCount,
+ int *const outBigramCount);
+
+ bool truncateUnigrams(const Ver4PatriciaTrieNodeReader *const ptNodeReader,
+ Ver4PatriciaTrieNodeWriter *const ptNodeWriter, const int maxUnigramCount);
+
+ bool truncateBigrams(const int maxBigramCount);
+
+ Ver4DictBuffers *const mBuffers;
+};
+} // namespace v402
+} // namespace backward
+} // namespace latinime
+
+#endif /* LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_WRITING_HELPER_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.cpp
new file mode 100644
index 0000000..f37eed7
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.cpp
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.h"
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+bool Ver4PtNodeArrayReader::readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos,
+ int *const outPtNodeCount, int *const outFirstPtNodePos) const {
+ if (ptNodeArrayPos < 0 || ptNodeArrayPos >= mBuffer->getTailPosition()) {
+ // Reading invalid position because of a bug or a broken dictionary.
+ AKLOGE("Reading PtNode array info from invalid dictionary position: %d, dict size: %d",
+ ptNodeArrayPos, mBuffer->getTailPosition());
+ ASSERT(false);
+ return false;
+ }
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodeArrayPos);
+ const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
+ int readingPos = ptNodeArrayPos;
+ if (usesAdditionalBuffer) {
+ readingPos -= mBuffer->getOriginalBufferSize();
+ }
+ const int ptNodeCountInArray = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
+ dictBuf, &readingPos);
+ if (usesAdditionalBuffer) {
+ readingPos += mBuffer->getOriginalBufferSize();
+ }
+ if (ptNodeCountInArray < 0) {
+ AKLOGE("Invalid PtNode count in an array: %d.", ptNodeCountInArray);
+ return false;
+ }
+ *outPtNodeCount = ptNodeCountInArray;
+ *outFirstPtNodePos = readingPos;
+ return true;
+}
+
+bool Ver4PtNodeArrayReader::readForwardLinkAndReturnIfValid(const int forwordLinkPos,
+ int *const outNextPtNodeArrayPos) const {
+ if (forwordLinkPos < 0 || forwordLinkPos >= mBuffer->getTailPosition()) {
+ // Reading invalid position because of bug or broken dictionary.
+ AKLOGE("Reading forward link from invalid dictionary position: %d, dict size: %d",
+ forwordLinkPos, mBuffer->getTailPosition());
+ ASSERT(false);
+ return false;
+ }
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(forwordLinkPos);
+ const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
+ int readingPos = forwordLinkPos;
+ if (usesAdditionalBuffer) {
+ readingPos -= mBuffer->getOriginalBufferSize();
+ }
+ const int nextPtNodeArrayOffset =
+ DynamicPtReadingUtils::getForwardLinkPosition(dictBuf, readingPos);
+ if (DynamicPtReadingUtils::isValidForwardLinkPosition(nextPtNodeArrayOffset)) {
+ *outNextPtNodeArrayPos = forwordLinkPos + nextPtNodeArrayOffset;
+ } else {
+ *outNextPtNodeArrayPos = NOT_A_DICT_POS;
+ }
+ return true;
+}
+
+} // namespace v402
+} // namespace backward
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.h
new file mode 100644
index 0000000..b09e4f0
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h
+ */
+
+#ifndef LATINIME_BACKWARD_V402_VER4_PT_NODE_ARRAY_READER_H
+#define LATINIME_BACKWARD_V402_VER4_PT_NODE_ARRAY_READER_H
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+} // namespace v402
+} // namespace backward
+class BufferWithExtendableBuffer;
+namespace backward {
+namespace v402 {
+
+class Ver4PtNodeArrayReader : public PtNodeArrayReader {
+ public:
+ Ver4PtNodeArrayReader(const BufferWithExtendableBuffer *const buffer) : mBuffer(buffer) {};
+
+ virtual bool readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos,
+ int *const outPtNodeCount, int *const outFirstPtNodePos) const;
+ virtual bool readForwardLinkAndReturnIfValid(const int forwordLinkPos,
+ int *const outNextPtNodeArrayPos) const;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(Ver4PtNodeArrayReader);
+
+ const BufferWithExtendableBuffer *const mBuffer;
+};
+} // namespace v402
+} // namespace backward
+} // namespace latinime
+#endif /* LATINIME_BACKWARD_V402_VER4_PT_NODE_ARRAY_READER_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
new file mode 100644
index 0000000..3f09618
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
@@ -0,0 +1,205 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
+
+#include <climits>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/file_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/format_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+#include "third_party/android_prediction/utils/byte_array_view.h"
+
+namespace latinime {
+
+/* static */ DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ DictionaryStructureWithBufferPolicyFactory::newPolicyForExistingDictFile(
+ const char *const path, const int bufOffset, const int size,
+ const bool isUpdatable) {
+ if (FileUtils::existsDir(path)) {
+ // Given path represents a directory.
+ return newPolicyForDirectoryDict(path, isUpdatable);
+ } else {
+ if (isUpdatable) {
+ AKLOGE("One file dictionaries don't support updating. path: %s", path);
+ ASSERT(false);
+ return nullptr;
+ }
+ return newPolicyForFileDict(path, bufOffset, size);
+ }
+}
+
+/* static */ DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ DictionaryStructureWithBufferPolicyFactory:: newPolicyForOnMemoryDict(
+ const int formatVersion, const std::vector<int> &locale,
+ const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap) {
+ FormatUtils::FORMAT_VERSION dictFormatVersion = FormatUtils::getFormatVersion(formatVersion);
+ switch (dictFormatVersion) {
+ case FormatUtils::VERSION_4: {
+ return newPolicyForOnMemoryV4Dict<backward::v402::Ver4DictConstants,
+ backward::v402::Ver4DictBuffers,
+ backward::v402::Ver4DictBuffers::Ver4DictBuffersPtr,
+ backward::v402::Ver4PatriciaTriePolicy>(
+ dictFormatVersion, locale, attributeMap);
+ }
+ case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
+ case FormatUtils::VERSION_4_DEV: {
+ return newPolicyForOnMemoryV4Dict<Ver4DictConstants, Ver4DictBuffers,
+ Ver4DictBuffers::Ver4DictBuffersPtr, Ver4PatriciaTriePolicy>(
+ dictFormatVersion, locale, attributeMap);
+ }
+ default:
+ AKLOGE("DICT: dictionary format %d is not supported for on memory dictionary",
+ formatVersion);
+ break;
+ }
+ return nullptr;
+}
+
+template<class DictConstants, class DictBuffers, class DictBuffersPtr, class StructurePolicy>
+/* static */ DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ DictionaryStructureWithBufferPolicyFactory::newPolicyForOnMemoryV4Dict(
+ const FormatUtils::FORMAT_VERSION formatVersion,
+ const std::vector<int> &locale,
+ const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap) {
+ HeaderPolicy headerPolicy(formatVersion, locale, attributeMap);
+ DictBuffersPtr dictBuffers = DictBuffers::createVer4DictBuffers(&headerPolicy,
+ DictConstants::MAX_DICT_EXTENDED_REGION_SIZE);
+ if (!DynamicPtWritingUtils::writeEmptyDictionary(
+ dictBuffers->getWritableTrieBuffer(), 0 /* rootPos */)) {
+ AKLOGE("Empty ver4 dictionary structure cannot be created on memory.");
+ return nullptr;
+ }
+ return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(
+ new StructurePolicy(std::move(dictBuffers)));
+}
+
+/* static */ DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ DictionaryStructureWithBufferPolicyFactory::newPolicyForDirectoryDict(
+ const char *const path, const bool isUpdatable) {
+ const int headerFilePathBufSize = PATH_MAX + 1 /* terminator */;
+ char headerFilePath[headerFilePathBufSize];
+ getHeaderFilePathInDictDir(path, headerFilePathBufSize, headerFilePath);
+ // Allocated buffer in MmapedBuffer::openBuffer() will be freed in the destructor of
+ // MmappedBufferPtr if the instance has the responsibility.
+ MmappedBuffer::MmappedBufferPtr mmappedBuffer =
+ MmappedBuffer::openBuffer(headerFilePath, isUpdatable);
+ if (!mmappedBuffer) {
+ return nullptr;
+ }
+ const FormatUtils::FORMAT_VERSION formatVersion = FormatUtils::detectFormatVersion(
+ mmappedBuffer->getReadOnlyByteArrayView().data(),
+ mmappedBuffer->getReadOnlyByteArrayView().size());
+ switch (formatVersion) {
+ case FormatUtils::VERSION_2:
+ AKLOGE("Given path is a directory but the format is version 2. path: %s", path);
+ break;
+ case FormatUtils::VERSION_4: {
+ return newPolicyForV4Dict<backward::v402::Ver4DictConstants,
+ backward::v402::Ver4DictBuffers,
+ backward::v402::Ver4DictBuffers::Ver4DictBuffersPtr,
+ backward::v402::Ver4PatriciaTriePolicy>(
+ headerFilePath, formatVersion, std::move(mmappedBuffer));
+ }
+ case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
+ case FormatUtils::VERSION_4_DEV: {
+ return newPolicyForV4Dict<Ver4DictConstants, Ver4DictBuffers,
+ Ver4DictBuffers::Ver4DictBuffersPtr, Ver4PatriciaTriePolicy>(
+ headerFilePath, formatVersion, std::move(mmappedBuffer));
+ }
+ default:
+ AKLOGE("DICT: dictionary format is unknown, bad magic number. path: %s", path);
+ break;
+ }
+ ASSERT(false);
+ return nullptr;
+}
+
+template<class DictConstants, class DictBuffers, class DictBuffersPtr, class StructurePolicy>
+/* static */ DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ DictionaryStructureWithBufferPolicyFactory::newPolicyForV4Dict(
+ const char *const headerFilePath, const FormatUtils::FORMAT_VERSION formatVersion,
+ MmappedBuffer::MmappedBufferPtr &&mmappedBuffer) {
+ const int dictDirPathBufSize = strlen(headerFilePath) + 1 /* terminator */;
+ char dictPath[dictDirPathBufSize];
+ if (!FileUtils::getFilePathWithoutSuffix(headerFilePath,
+ DictConstants::HEADER_FILE_EXTENSION, dictDirPathBufSize, dictPath)) {
+ AKLOGE("Dictionary file name is not valid as a ver4 dictionary. header path: %s",
+ headerFilePath);
+ ASSERT(false);
+ return nullptr;
+ }
+ DictBuffersPtr dictBuffers =
+ DictBuffers::openVer4DictBuffers(dictPath, std::move(mmappedBuffer), formatVersion);
+ if (!dictBuffers || !dictBuffers->isValid()) {
+ AKLOGE("DICT: The dictionary doesn't satisfy ver4 format requirements. path: %s",
+ dictPath);
+ ASSERT(false);
+ return nullptr;
+ }
+ return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(
+ new StructurePolicy(std::move(dictBuffers)));
+}
+
+/* static */ DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ DictionaryStructureWithBufferPolicyFactory::newPolicyForFileDict(
+ const char *const path, const int bufOffset, const int size) {
+ // Allocated buffer in MmapedBuffer::openBuffer() will be freed in the destructor of
+ // MmappedBufferPtr if the instance has the responsibility.
+ MmappedBuffer::MmappedBufferPtr mmappedBuffer(
+ MmappedBuffer::openBuffer(path, bufOffset, size, false /* isUpdatable */));
+ if (!mmappedBuffer) {
+ return nullptr;
+ }
+ switch (FormatUtils::detectFormatVersion(mmappedBuffer->getReadOnlyByteArrayView().data(),
+ mmappedBuffer->getReadOnlyByteArrayView().size())) {
+ case FormatUtils::VERSION_2:
+ return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(
+ new PatriciaTriePolicy(std::move(mmappedBuffer)));
+ case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
+ case FormatUtils::VERSION_4:
+ case FormatUtils::VERSION_4_DEV:
+ AKLOGE("Given path is a file but the format is version 4. path: %s", path);
+ break;
+ default:
+ AKLOGE("DICT: dictionary format is unknown, bad magic number. path: %s", path);
+ break;
+ }
+ ASSERT(false);
+ return nullptr;
+}
+
+/* static */ void DictionaryStructureWithBufferPolicyFactory::getHeaderFilePathInDictDir(
+ const char *const dictDirPath, const int outHeaderFileBufSize,
+ char *const outHeaderFilePath) {
+ const int dictNameBufSize = strlen(dictDirPath) + 1 /* terminator */;
+ char dictName[dictNameBufSize];
+ FileUtils::getBasename(dictDirPath, dictNameBufSize, dictName);
+ snprintf(outHeaderFilePath, outHeaderFileBufSize, "%s/%s%s", dictDirPath,
+ dictName, Ver4DictConstants::HEADER_FILE_EXTENSION);
+}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h
new file mode 100644
index 0000000..4461dcb
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H
+#define LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H
+
+#include <vector>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/policy/dictionary_header_structure_policy.h"
+#include "third_party/android_prediction/suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/format_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+
+namespace latinime {
+
+class DictionaryStructureWithBufferPolicyFactory {
+ public:
+ static DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ newPolicyForExistingDictFile(const char *const path, const int bufOffset,
+ const int size, const bool isUpdatable);
+
+ static DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ newPolicyForOnMemoryDict(const int formatVersion, const std::vector<int> &locale,
+ const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryStructureWithBufferPolicyFactory);
+
+ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class StructurePolicy>
+ static DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ newPolicyForOnMemoryV4Dict(const FormatUtils::FORMAT_VERSION formatVersion,
+ const std::vector<int> &locale,
+ const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap);
+
+ static DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ newPolicyForDirectoryDict(const char *const path, const bool isUpdatable);
+
+ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class StructurePolicy>
+ static DictionaryStructureWithBufferPolicy::StructurePolicyPtr newPolicyForV4Dict(
+ const char *const headerFilePath, const FormatUtils::FORMAT_VERSION formatVersion,
+ MmappedBuffer::MmappedBufferPtr &&mmappedBuffer);
+
+ static DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ newPolicyForFileDict(const char *const path, const int bufOffset, const int size);
+
+ static void getHeaderFilePathInDictDir(const char *const dirPath,
+ const int outHeaderFileBufSize, char *const outHeaderFilePath);
+};
+} // namespace latinime
+#endif // LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.cpp
new file mode 100644
index 0000000..acc5a23
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.cpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h"
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/byte_array_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+const BigramListReadWriteUtils::BigramFlags BigramListReadWriteUtils::MASK_ATTRIBUTE_ADDRESS_TYPE =
+ 0x30;
+const BigramListReadWriteUtils::BigramFlags
+ BigramListReadWriteUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10;
+const BigramListReadWriteUtils::BigramFlags
+ BigramListReadWriteUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20;
+const BigramListReadWriteUtils::BigramFlags
+ BigramListReadWriteUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30;
+const BigramListReadWriteUtils::BigramFlags
+ BigramListReadWriteUtils::FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
+// Flag for presence of more attributes
+const BigramListReadWriteUtils::BigramFlags BigramListReadWriteUtils::FLAG_ATTRIBUTE_HAS_NEXT =
+ 0x80;
+// Mask for attribute probability, stored on 4 bits inside the flags byte.
+const BigramListReadWriteUtils::BigramFlags
+ BigramListReadWriteUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
+
+/* static */ bool BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
+ const uint8_t *const bigramsBuf, const int bufSize, BigramFlags *const outBigramFlags,
+ int *const outTargetPtNodePos, int *const bigramEntryPos) {
+ if (bufSize <= *bigramEntryPos) {
+ AKLOGE("Read invalid pos in getBigramEntryPropertiesAndAdvancePosition(). bufSize: %d, "
+ "bigramEntryPos: %d.", bufSize, *bigramEntryPos);
+ return false;
+ }
+ const BigramFlags bigramFlags = ByteArrayUtils::readUint8AndAdvancePosition(bigramsBuf,
+ bigramEntryPos);
+ if (outBigramFlags) {
+ *outBigramFlags = bigramFlags;
+ }
+ const int targetPos = getBigramAddressAndAdvancePosition(bigramsBuf, bigramFlags,
+ bigramEntryPos);
+ if (outTargetPtNodePos) {
+ *outTargetPtNodePos = targetPos;
+ }
+ return true;
+}
+
+/* static */ bool BigramListReadWriteUtils::skipExistingBigrams(const uint8_t *const bigramsBuf,
+ const int bufSize, int *const bigramListPos) {
+ BigramFlags flags;
+ do {
+ if (!getBigramEntryPropertiesAndAdvancePosition(bigramsBuf, bufSize, &flags,
+ 0 /* outTargetPtNodePos */, bigramListPos)) {
+ return false;
+ }
+ } while(hasNext(flags));
+ return true;
+}
+
+/* static */ int BigramListReadWriteUtils::getBigramAddressAndAdvancePosition(
+ const uint8_t *const bigramsBuf, const BigramFlags flags, int *const pos) {
+ int offset = 0;
+ const int origin = *pos;
+ switch (MASK_ATTRIBUTE_ADDRESS_TYPE & flags) {
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
+ offset = ByteArrayUtils::readUint8AndAdvancePosition(bigramsBuf, pos);
+ break;
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
+ offset = ByteArrayUtils::readUint16AndAdvancePosition(bigramsBuf, pos);
+ break;
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
+ offset = ByteArrayUtils::readUint24AndAdvancePosition(bigramsBuf, pos);
+ break;
+ }
+ if (isOffsetNegative(flags)) {
+ return origin - offset;
+ } else {
+ return origin + offset;
+ }
+}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h
new file mode 100644
index 0000000..3db2c3d
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_BIGRAM_LIST_READ_WRITE_UTILS_H
+#define LATINIME_BIGRAM_LIST_READ_WRITE_UTILS_H
+
+#include <cstdint>
+#include <cstdlib>
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+class BufferWithExtendableBuffer;
+
+class BigramListReadWriteUtils {
+public:
+ typedef uint8_t BigramFlags;
+
+ static bool getBigramEntryPropertiesAndAdvancePosition(const uint8_t *const bigramsBuf,
+ const int bufSize, BigramFlags *const outBigramFlags, int *const outTargetPtNodePos,
+ int *const bigramEntryPos);
+
+ static AK_FORCE_INLINE int getProbabilityFromFlags(const BigramFlags flags) {
+ return flags & MASK_ATTRIBUTE_PROBABILITY;
+ }
+
+ static AK_FORCE_INLINE bool hasNext(const BigramFlags flags) {
+ return (flags & FLAG_ATTRIBUTE_HAS_NEXT) != 0;
+ }
+
+ // Bigrams reading methods
+ static bool skipExistingBigrams(const uint8_t *const bigramsBuf, const int bufSize,
+ int *const bigramListPos);
+
+private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListReadWriteUtils);
+
+ static const BigramFlags MASK_ATTRIBUTE_ADDRESS_TYPE;
+ static const BigramFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE;
+ static const BigramFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES;
+ static const BigramFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
+ static const BigramFlags FLAG_ATTRIBUTE_OFFSET_NEGATIVE;
+ static const BigramFlags FLAG_ATTRIBUTE_HAS_NEXT;
+ static const BigramFlags MASK_ATTRIBUTE_PROBABILITY;
+
+ static AK_FORCE_INLINE bool isOffsetNegative(const BigramFlags flags) {
+ return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0;
+ }
+
+ static int getBigramAddressAndAdvancePosition(const uint8_t *const bigramsBuf,
+ const BigramFlags flags, int *const pos);
+};
+} // namespace latinime
+#endif // LATINIME_BIGRAM_LIST_READ_WRITE_UTILS_H
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp
new file mode 100644
index 0000000..ba13b80
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp
@@ -0,0 +1,144 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h"
+
+#include "third_party/android_prediction/suggest/core/policy/dictionary_header_structure_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
+
+namespace latinime {
+
+bool DynamicPtGcEventListeners
+ ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
+ ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
+ // PtNode is useless when the PtNode is not a terminal and doesn't have any not useless
+ // children.
+ bool isUselessPtNode = !ptNodeParams->isTerminal();
+ if (ptNodeParams->isTerminal() && !ptNodeParams->representsNonWordInfo()) {
+ bool needsToKeepPtNode = true;
+ if (!mPtNodeWriter->updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(
+ ptNodeParams, &needsToKeepPtNode)) {
+ AKLOGE("Cannot update PtNode probability or get needs to keep PtNode after GC.");
+ return false;
+ }
+ if (!needsToKeepPtNode) {
+ isUselessPtNode = true;
+ }
+ }
+ if (mChildrenValue > 0) {
+ isUselessPtNode = false;
+ } else if (ptNodeParams->isTerminal()) {
+ // Remove children as all children are useless.
+ if (!mPtNodeWriter->updateChildrenPosition(ptNodeParams,
+ NOT_A_DICT_POS /* newChildrenPosition */)) {
+ return false;
+ }
+ }
+ if (isUselessPtNode) {
+ // Current PtNode is no longer needed. Mark it as deleted.
+ if (!mPtNodeWriter->markPtNodeAsDeleted(ptNodeParams)) {
+ return false;
+ }
+ } else {
+ mValueStack.back() += 1;
+ if (ptNodeParams->isTerminal() && !ptNodeParams->representsNonWordInfo()) {
+ mValidUnigramCount += 1;
+ }
+ }
+ return true;
+}
+
+bool DynamicPtGcEventListeners::TraversePolicyToUpdateBigramProbability
+ ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
+ if (!ptNodeParams->isDeleted()) {
+ int bigramEntryCount = 0;
+ if (!mPtNodeWriter->updateAllBigramEntriesAndDeleteUselessEntries(ptNodeParams,
+ &bigramEntryCount)) {
+ return false;
+ }
+ mValidBigramEntryCount += bigramEntryCount;
+ }
+ return true;
+}
+
+// Writes dummy PtNode array size when the head of PtNode array is read.
+bool DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
+ ::onDescend(const int ptNodeArrayPos) {
+ mValidPtNodeCount = 0;
+ int writingPos = mBufferToWrite->getTailPosition();
+ mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.insert(
+ PtNodeWriter::PtNodeArrayPositionRelocationMap::value_type(ptNodeArrayPos, writingPos));
+ // Writes dummy PtNode array size because arrays can have a forward link or needles PtNodes.
+ // This field will be updated later in onReadingPtNodeArrayTail() with actual PtNode count.
+ mPtNodeArraySizeFieldPos = writingPos;
+ return DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition(
+ mBufferToWrite, 0 /* arraySize */, &writingPos);
+}
+
+// Write PtNode array terminal and actual PtNode array size.
+bool DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
+ ::onReadingPtNodeArrayTail() {
+ int writingPos = mBufferToWrite->getTailPosition();
+ // Write PtNode array terminal.
+ if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(
+ mBufferToWrite, NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
+ return false;
+ }
+ // Write actual PtNode array size.
+ if (!DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition(
+ mBufferToWrite, mValidPtNodeCount, &mPtNodeArraySizeFieldPos)) {
+ return false;
+ }
+ return true;
+}
+
+// Write valid PtNode to buffer and memorize mapping from the old position to the new position.
+bool DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
+ ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
+ if (ptNodeParams->isDeleted()) {
+ // Current PtNode is not written in new buffer because it has been deleted.
+ mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert(
+ PtNodeWriter::PtNodePositionRelocationMap::value_type(
+ ptNodeParams->getHeadPos(), NOT_A_DICT_POS));
+ return true;
+ }
+ int writingPos = mBufferToWrite->getTailPosition();
+ mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert(
+ PtNodeWriter::PtNodePositionRelocationMap::value_type(
+ ptNodeParams->getHeadPos(), writingPos));
+ mValidPtNodeCount++;
+ // Writes current PtNode.
+ return mPtNodeWriter->writePtNodeAndAdvancePosition(ptNodeParams, &writingPos);
+}
+
+bool DynamicPtGcEventListeners::TraversePolicyToUpdateAllPositionFields
+ ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
+ // Updates parent position.
+ int bigramCount = 0;
+ if (!mPtNodeWriter->updateAllPositionFields(ptNodeParams, mDictPositionRelocationMap,
+ &bigramCount)) {
+ return false;
+ }
+ mBigramCount += bigramCount;
+ if (ptNodeParams->isTerminal()) {
+ mUnigramCount++;
+ }
+ return true;
+}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h
new file mode 100644
index 0000000..9012d9a
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h
@@ -0,0 +1,172 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DYNAMIC_PT_GC_EVENT_LISTENERS_H
+#define LATINIME_DYNAMIC_PT_GC_EVENT_LISTENERS_H
+
+#include <vector>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+class PtNodeParams;
+
+class DynamicPtGcEventListeners {
+ public:
+ // Updates all PtNodes that can be reached from the root. Checks if each PtNode is useless or
+ // not and marks useless PtNodes as deleted. Such deleted PtNodes will be discarded in the GC.
+ // TODO: Concatenate non-terminal PtNodes.
+ class TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
+ : public DynamicPtReadingHelper::TraversingEventListener {
+ public:
+ TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
+ PtNodeWriter *const ptNodeWriter)
+ : mPtNodeWriter(ptNodeWriter), mValueStack(), mChildrenValue(0),
+ mValidUnigramCount(0) {}
+
+ ~TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted() {};
+
+ bool onAscend() {
+ if (mValueStack.empty()) {
+ return false;
+ }
+ mChildrenValue = mValueStack.back();
+ mValueStack.pop_back();
+ return true;
+ }
+
+ bool onDescend(const int ptNodeArrayPos) {
+ mValueStack.push_back(0);
+ mChildrenValue = 0;
+ return true;
+ }
+
+ bool onReadingPtNodeArrayTail() { return true; }
+
+ bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
+
+ int getValidUnigramCount() const {
+ return mValidUnigramCount;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(
+ TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted);
+
+ PtNodeWriter *const mPtNodeWriter;
+ std::vector<int> mValueStack;
+ int mChildrenValue;
+ int mValidUnigramCount;
+ };
+
+ // Updates all bigram entries that are held by valid PtNodes. This removes useless bigram
+ // entries.
+ class TraversePolicyToUpdateBigramProbability
+ : public DynamicPtReadingHelper::TraversingEventListener {
+ public:
+ TraversePolicyToUpdateBigramProbability(PtNodeWriter *const ptNodeWriter)
+ : mPtNodeWriter(ptNodeWriter), mValidBigramEntryCount(0) {}
+
+ bool onAscend() { return true; }
+
+ bool onDescend(const int ptNodeArrayPos) { return true; }
+
+ bool onReadingPtNodeArrayTail() { return true; }
+
+ bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
+
+ int getValidBigramEntryCount() const {
+ return mValidBigramEntryCount;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateBigramProbability);
+
+ PtNodeWriter *const mPtNodeWriter;
+ int mValidBigramEntryCount;
+ };
+
+ class TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
+ : public DynamicPtReadingHelper::TraversingEventListener {
+ public:
+ TraversePolicyToPlaceAndWriteValidPtNodesToBuffer(
+ PtNodeWriter *const ptNodeWriter, BufferWithExtendableBuffer *const bufferToWrite,
+ PtNodeWriter::DictPositionRelocationMap *const dictPositionRelocationMap)
+ : mPtNodeWriter(ptNodeWriter), mBufferToWrite(bufferToWrite),
+ mDictPositionRelocationMap(dictPositionRelocationMap), mValidPtNodeCount(0),
+ mPtNodeArraySizeFieldPos(NOT_A_DICT_POS) {};
+
+ bool onAscend() { return true; }
+
+ bool onDescend(const int ptNodeArrayPos);
+
+ bool onReadingPtNodeArrayTail();
+
+ bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToPlaceAndWriteValidPtNodesToBuffer);
+
+ PtNodeWriter *const mPtNodeWriter;
+ BufferWithExtendableBuffer *const mBufferToWrite;
+ PtNodeWriter::DictPositionRelocationMap *const mDictPositionRelocationMap;
+ int mValidPtNodeCount;
+ int mPtNodeArraySizeFieldPos;
+ };
+
+ class TraversePolicyToUpdateAllPositionFields
+ : public DynamicPtReadingHelper::TraversingEventListener {
+ public:
+ TraversePolicyToUpdateAllPositionFields(PtNodeWriter *const ptNodeWriter,
+ const PtNodeWriter::DictPositionRelocationMap *const dictPositionRelocationMap)
+ : mPtNodeWriter(ptNodeWriter),
+ mDictPositionRelocationMap(dictPositionRelocationMap), mUnigramCount(0),
+ mBigramCount(0) {};
+
+ bool onAscend() { return true; }
+
+ bool onDescend(const int ptNodeArrayPos) { return true; }
+
+ bool onReadingPtNodeArrayTail() { return true; }
+
+ bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
+
+ int getUnigramCount() const {
+ return mUnigramCount;
+ }
+
+ int getBigramCount() const {
+ return mBigramCount;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateAllPositionFields);
+
+ PtNodeWriter *const mPtNodeWriter;
+ const PtNodeWriter::DictPositionRelocationMap *const mDictPositionRelocationMap;
+ int mUnigramCount;
+ int mBigramCount;
+ };
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPtGcEventListeners);
+};
+} // namespace latinime
+#endif /* LATINIME_DYNAMIC_PT_GC_EVENT_LISTENERS_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp
new file mode 100644
index 0000000..2764d0d
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp
@@ -0,0 +1,326 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h"
+#include "third_party/android_prediction/utils/char_utils.h"
+
+namespace latinime {
+
+// To avoid infinite loop caused by invalid or malicious forward links.
+const int DynamicPtReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
+const int DynamicPtReadingHelper::MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
+const size_t DynamicPtReadingHelper::MAX_READING_STATE_STACK_SIZE = MAX_WORD_LENGTH;
+
+bool DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions::onVisitingPtNode(
+ const PtNodeParams *const ptNodeParams) {
+ if (ptNodeParams->isTerminal() && !ptNodeParams->isDeleted()) {
+ mTerminalPositions->push_back(ptNodeParams->getHeadPos());
+ }
+ return true;
+}
+
+// Visits all PtNodes in post-order depth first manner.
+// For example, visits c -> b -> y -> x -> a for the following dictionary:
+// a _ b _ c
+// \ x _ y
+bool DynamicPtReadingHelper::traverseAllPtNodesInPostorderDepthFirstManner(
+ TraversingEventListener *const listener) {
+ bool alreadyVisitedChildren = false;
+ // Descend from the root to the root PtNode array.
+ if (!listener->onDescend(getPosOfLastPtNodeArrayHead())) {
+ return false;
+ }
+ while (!isEnd()) {
+ const PtNodeParams ptNodeParams(getPtNodeParams());
+ if (!ptNodeParams.isValid()) {
+ break;
+ }
+ if (!alreadyVisitedChildren) {
+ if (ptNodeParams.hasChildren()) {
+ // Move to the first child.
+ if (!listener->onDescend(ptNodeParams.getChildrenPos())) {
+ return false;
+ }
+ pushReadingStateToStack();
+ readChildNode(ptNodeParams);
+ } else {
+ alreadyVisitedChildren = true;
+ }
+ } else {
+ if (!listener->onVisitingPtNode(&ptNodeParams)) {
+ return false;
+ }
+ readNextSiblingNode(ptNodeParams);
+ if (isEnd()) {
+ // All PtNodes in current linked PtNode arrays have been visited.
+ // Return to the parent.
+ if (!listener->onReadingPtNodeArrayTail()) {
+ return false;
+ }
+ if (mReadingStateStack.size() <= 0) {
+ break;
+ }
+ if (!listener->onAscend()) {
+ return false;
+ }
+ popReadingStateFromStack();
+ alreadyVisitedChildren = true;
+ } else {
+ // Process sibling PtNode.
+ alreadyVisitedChildren = false;
+ }
+ }
+ }
+ // Ascend from the root PtNode array to the root.
+ if (!listener->onAscend()) {
+ return false;
+ }
+ return !isError();
+}
+
+// Visits all PtNodes in PtNode array level pre-order depth first manner, which is the same order
+// that PtNodes are written in the dictionary buffer.
+// For example, visits a -> b -> x -> c -> y for the following dictionary:
+// a _ b _ c
+// \ x _ y
+bool DynamicPtReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
+ TraversingEventListener *const listener) {
+ bool alreadyVisitedAllPtNodesInArray = false;
+ bool alreadyVisitedChildren = false;
+ // Descend from the root to the root PtNode array.
+ if (!listener->onDescend(getPosOfLastPtNodeArrayHead())) {
+ return false;
+ }
+ if (isEnd()) {
+ // Empty dictionary. Needs to notify the listener of the tail of empty PtNode array.
+ if (!listener->onReadingPtNodeArrayTail()) {
+ return false;
+ }
+ }
+ pushReadingStateToStack();
+ while (!isEnd()) {
+ const PtNodeParams ptNodeParams(getPtNodeParams());
+ if (!ptNodeParams.isValid()) {
+ break;
+ }
+ if (alreadyVisitedAllPtNodesInArray) {
+ if (alreadyVisitedChildren) {
+ // Move to next sibling PtNode's children.
+ readNextSiblingNode(ptNodeParams);
+ if (isEnd()) {
+ // Return to the parent PTNode.
+ if (!listener->onAscend()) {
+ return false;
+ }
+ if (mReadingStateStack.size() <= 0) {
+ break;
+ }
+ popReadingStateFromStack();
+ alreadyVisitedChildren = true;
+ alreadyVisitedAllPtNodesInArray = true;
+ } else {
+ alreadyVisitedChildren = false;
+ }
+ } else {
+ if (ptNodeParams.hasChildren()) {
+ // Move to the first child.
+ if (!listener->onDescend(ptNodeParams.getChildrenPos())) {
+ return false;
+ }
+ pushReadingStateToStack();
+ readChildNode(ptNodeParams);
+ // Push state to return the head of PtNode array.
+ pushReadingStateToStack();
+ alreadyVisitedAllPtNodesInArray = false;
+ alreadyVisitedChildren = false;
+ } else {
+ alreadyVisitedChildren = true;
+ }
+ }
+ } else {
+ if (!listener->onVisitingPtNode(&ptNodeParams)) {
+ return false;
+ }
+ readNextSiblingNode(ptNodeParams);
+ if (isEnd()) {
+ if (!listener->onReadingPtNodeArrayTail()) {
+ return false;
+ }
+ // Return to the head of current PtNode array.
+ popReadingStateFromStack();
+ alreadyVisitedAllPtNodesInArray = true;
+ }
+ }
+ }
+ popReadingStateFromStack();
+ // Ascend from the root PtNode array to the root.
+ if (!listener->onAscend()) {
+ return false;
+ }
+ return !isError();
+}
+
+int DynamicPtReadingHelper::getCodePointsAndProbabilityAndReturnCodePointCount(
+ const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) {
+ // This method traverses parent nodes from the terminal by following parent pointers; thus,
+ // node code points are stored in the buffer in the reverse order.
+ int reverseCodePoints[maxCodePointCount];
+ const PtNodeParams terminalPtNodeParams(getPtNodeParams());
+ // First, read the terminal node and get its probability.
+ if (!isValidTerminalNode(terminalPtNodeParams)) {
+ // Node at the ptNodePos is not a valid terminal node.
+ *outUnigramProbability = NOT_A_PROBABILITY;
+ return 0;
+ }
+ // Store terminal node probability.
+ *outUnigramProbability = terminalPtNodeParams.getProbability();
+ // Then, following parent node link to the dictionary root and fetch node code points.
+ int totalCodePointCount = 0;
+ while (!isEnd()) {
+ const PtNodeParams ptNodeParams(getPtNodeParams());
+ totalCodePointCount = getTotalCodePointCount(ptNodeParams);
+ if (!ptNodeParams.isValid() || totalCodePointCount > maxCodePointCount) {
+ // The ptNodePos is not a valid terminal node position in the dictionary.
+ *outUnigramProbability = NOT_A_PROBABILITY;
+ return 0;
+ }
+ // Store node code points to buffer in the reverse order.
+ fetchMergedNodeCodePointsInReverseOrder(ptNodeParams, getPrevTotalCodePointCount(),
+ reverseCodePoints);
+ // Follow parent node toward the root node.
+ readParentNode(ptNodeParams);
+ }
+ if (isError()) {
+ // The node position or the dictionary is invalid.
+ *outUnigramProbability = NOT_A_PROBABILITY;
+ return 0;
+ }
+ // Reverse the stored code points to output them.
+ for (int i = 0; i < totalCodePointCount; ++i) {
+ outCodePoints[i] = reverseCodePoints[totalCodePointCount - i - 1];
+ }
+ return totalCodePointCount;
+}
+
+int DynamicPtReadingHelper::getTerminalPtNodePositionOfWord(const int *const inWord,
+ const int length, const bool forceLowerCaseSearch) {
+ int searchCodePoints[length];
+ for (int i = 0; i < length; ++i) {
+ searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i];
+ }
+ while (!isEnd()) {
+ const PtNodeParams ptNodeParams(getPtNodeParams());
+ const int matchedCodePointCount = getPrevTotalCodePointCount();
+ if (getTotalCodePointCount(ptNodeParams) > length
+ || !isMatchedCodePoint(ptNodeParams, 0 /* index */,
+ searchCodePoints[matchedCodePointCount])) {
+ // Current node has too many code points or its first code point is different from
+ // target code point. Skip this node and read the next sibling node.
+ readNextSiblingNode(ptNodeParams);
+ continue;
+ }
+ // Check following merged node code points.
+ const int nodeCodePointCount = ptNodeParams.getCodePointCount();
+ for (int j = 1; j < nodeCodePointCount; ++j) {
+ if (!isMatchedCodePoint(ptNodeParams, j, searchCodePoints[matchedCodePointCount + j])) {
+ // Different code point is found. The given word is not included in the dictionary.
+ return NOT_A_DICT_POS;
+ }
+ }
+ // All characters are matched.
+ if (length == getTotalCodePointCount(ptNodeParams)) {
+ if (!ptNodeParams.isTerminal()) {
+ return NOT_A_DICT_POS;
+ }
+ // Terminal position is found.
+ return ptNodeParams.getHeadPos();
+ }
+ if (!ptNodeParams.hasChildren()) {
+ return NOT_A_DICT_POS;
+ }
+ // Advance to the children nodes.
+ readChildNode(ptNodeParams);
+ }
+ // If we already traversed the tree further than the word is long, there means
+ // there was no match (or we would have found it).
+ return NOT_A_DICT_POS;
+}
+
+// Read node array size and process empty node arrays. Nodes and arrays are counted up in this
+// method to avoid an infinite loop.
+void DynamicPtReadingHelper::nextPtNodeArray() {
+ int ptNodeCountInArray = 0;
+ int firstPtNodePos = NOT_A_DICT_POS;
+ if (!mPtNodeArrayReader->readPtNodeArrayInfoAndReturnIfValid(
+ mReadingState.mPos, &ptNodeCountInArray, &firstPtNodePos)) {
+ mIsError = true;
+ mReadingState.mPos = NOT_A_DICT_POS;
+ return;
+ }
+ mReadingState.mPosOfThisPtNodeArrayHead = mReadingState.mPos;
+ mReadingState.mRemainingPtNodeCountInThisArray = ptNodeCountInArray;
+ mReadingState.mPos = firstPtNodePos;
+ // Count up nodes and node arrays to avoid infinite loop.
+ mReadingState.mTotalPtNodeIndexInThisArrayChain +=
+ mReadingState.mRemainingPtNodeCountInThisArray;
+ mReadingState.mPtNodeArrayIndexInThisArrayChain++;
+ if (mReadingState.mRemainingPtNodeCountInThisArray < 0
+ || mReadingState.mTotalPtNodeIndexInThisArrayChain
+ > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP
+ || mReadingState.mPtNodeArrayIndexInThisArrayChain
+ > MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) {
+ // Invalid dictionary.
+ AKLOGI("Invalid dictionary. nodeCount: %d, totalNodeCount: %d, MAX_CHILD_COUNT: %d"
+ "nodeArrayCount: %d, MAX_NODE_ARRAY_COUNT: %d",
+ mReadingState.mRemainingPtNodeCountInThisArray,
+ mReadingState.mTotalPtNodeIndexInThisArrayChain,
+ MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP,
+ mReadingState.mPtNodeArrayIndexInThisArrayChain,
+ MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP);
+ ASSERT(false);
+ mIsError = true;
+ mReadingState.mPos = NOT_A_DICT_POS;
+ return;
+ }
+ if (mReadingState.mRemainingPtNodeCountInThisArray == 0) {
+ // Empty node array. Try following forward link.
+ followForwardLink();
+ }
+}
+
+// Follow the forward link and read the next node array if exists.
+void DynamicPtReadingHelper::followForwardLink() {
+ int nextPtNodeArrayPos = NOT_A_DICT_POS;
+ if (!mPtNodeArrayReader->readForwardLinkAndReturnIfValid(
+ mReadingState.mPos, &nextPtNodeArrayPos)) {
+ mIsError = true;
+ mReadingState.mPos = NOT_A_DICT_POS;
+ return;
+ }
+ mReadingState.mPosOfLastForwardLinkField = mReadingState.mPos;
+ if (nextPtNodeArrayPos != NOT_A_DICT_POS) {
+ // Follow the forward link.
+ mReadingState.mPos = nextPtNodeArrayPos;
+ nextPtNodeArray();
+ } else {
+ // All node arrays have been read.
+ mReadingState.mPos = NOT_A_DICT_POS;
+ }
+}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h
new file mode 100644
index 0000000..c6a6091
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h
@@ -0,0 +1,283 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DYNAMIC_PT_READING_HELPER_H
+#define LATINIME_DYNAMIC_PT_READING_HELPER_H
+
+#include <cstddef>
+#include <vector>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
+
+namespace latinime {
+
+class DictionaryShortcutsStructurePolicy;
+class PtNodeArrayReader;
+
+/*
+ * This class is used for traversing dynamic patricia trie. This class supports iterating nodes and
+ * dealing with additional buffer. This class counts nodes and node arrays to avoid infinite loop.
+ */
+class DynamicPtReadingHelper {
+ public:
+ class TraversingEventListener {
+ public:
+ virtual ~TraversingEventListener() {};
+
+ // Returns whether the event handling was succeeded or not.
+ virtual bool onAscend() = 0;
+
+ // Returns whether the event handling was succeeded or not.
+ virtual bool onDescend(const int ptNodeArrayPos) = 0;
+
+ // Returns whether the event handling was succeeded or not.
+ virtual bool onReadingPtNodeArrayTail() = 0;
+
+ // Returns whether the event handling was succeeded or not.
+ virtual bool onVisitingPtNode(const PtNodeParams *const node) = 0;
+
+ protected:
+ TraversingEventListener() {};
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(TraversingEventListener);
+ };
+
+ class TraversePolicyToGetAllTerminalPtNodePositions : public TraversingEventListener {
+ public:
+ TraversePolicyToGetAllTerminalPtNodePositions(std::vector<int> *const terminalPositions)
+ : mTerminalPositions(terminalPositions) {}
+ bool onAscend() { return true; }
+ bool onDescend(const int ptNodeArrayPos) { return true; }
+ bool onReadingPtNodeArrayTail() { return true; }
+ bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToGetAllTerminalPtNodePositions);
+
+ std::vector<int> *const mTerminalPositions;
+ };
+
+ DynamicPtReadingHelper(const PtNodeReader *const ptNodeReader,
+ const PtNodeArrayReader *const ptNodeArrayReader)
+ : mIsError(false), mReadingState(), mPtNodeReader(ptNodeReader),
+ mPtNodeArrayReader(ptNodeArrayReader), mReadingStateStack() {}
+
+ ~DynamicPtReadingHelper() {}
+
+ AK_FORCE_INLINE bool isError() const {
+ return mIsError;
+ }
+
+ AK_FORCE_INLINE bool isEnd() const {
+ return mReadingState.mPos == NOT_A_DICT_POS;
+ }
+
+ // Initialize reading state with the head position of a PtNode array.
+ AK_FORCE_INLINE void initWithPtNodeArrayPos(const int ptNodeArrayPos) {
+ if (ptNodeArrayPos == NOT_A_DICT_POS) {
+ mReadingState.mPos = NOT_A_DICT_POS;
+ } else {
+ mIsError = false;
+ mReadingState.mPos = ptNodeArrayPos;
+ mReadingState.mTotalCodePointCountSinceInitialization = 0;
+ mReadingState.mTotalPtNodeIndexInThisArrayChain = 0;
+ mReadingState.mPtNodeArrayIndexInThisArrayChain = 0;
+ mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
+ mReadingStateStack.clear();
+ nextPtNodeArray();
+ }
+ }
+
+ // Initialize reading state with the head position of a node.
+ AK_FORCE_INLINE void initWithPtNodePos(const int ptNodePos) {
+ if (ptNodePos == NOT_A_DICT_POS) {
+ mReadingState.mPos = NOT_A_DICT_POS;
+ } else {
+ mIsError = false;
+ mReadingState.mPos = ptNodePos;
+ mReadingState.mRemainingPtNodeCountInThisArray = 1;
+ mReadingState.mTotalCodePointCountSinceInitialization = 0;
+ mReadingState.mTotalPtNodeIndexInThisArrayChain = 1;
+ mReadingState.mPtNodeArrayIndexInThisArrayChain = 1;
+ mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
+ mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS;
+ mReadingStateStack.clear();
+ }
+ }
+
+ AK_FORCE_INLINE const PtNodeParams getPtNodeParams() const {
+ if (isEnd()) {
+ return PtNodeParams();
+ }
+ return mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(mReadingState.mPos);
+ }
+
+ AK_FORCE_INLINE bool isValidTerminalNode(const PtNodeParams &ptNodeParams) const {
+ return !isEnd() && !ptNodeParams.isDeleted() && ptNodeParams.isTerminal();
+ }
+
+ AK_FORCE_INLINE bool isMatchedCodePoint(const PtNodeParams &ptNodeParams, const int index,
+ const int codePoint) const {
+ return ptNodeParams.getCodePoints()[index] == codePoint;
+ }
+
+ // Return code point count exclude the last read node's code points.
+ AK_FORCE_INLINE int getPrevTotalCodePointCount() const {
+ return mReadingState.mTotalCodePointCountSinceInitialization;
+ }
+
+ // Return code point count include the last read node's code points.
+ AK_FORCE_INLINE int getTotalCodePointCount(const PtNodeParams &ptNodeParams) const {
+ return mReadingState.mTotalCodePointCountSinceInitialization
+ + ptNodeParams.getCodePointCount();
+ }
+
+ AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder(const PtNodeParams &ptNodeParams,
+ const int index, int *const outCodePoints) const {
+ const int nodeCodePointCount = ptNodeParams.getCodePointCount();
+ const int *const nodeCodePoints = ptNodeParams.getCodePoints();
+ for (int i = 0; i < nodeCodePointCount; ++i) {
+ outCodePoints[index + i] = nodeCodePoints[nodeCodePointCount - 1 - i];
+ }
+ }
+
+ AK_FORCE_INLINE void readNextSiblingNode(const PtNodeParams &ptNodeParams) {
+ mReadingState.mRemainingPtNodeCountInThisArray -= 1;
+ mReadingState.mPos = ptNodeParams.getSiblingNodePos();
+ if (mReadingState.mRemainingPtNodeCountInThisArray <= 0) {
+ // All nodes in the current node array have been read.
+ followForwardLink();
+ }
+ }
+
+ // Read the first child node of the current node.
+ AK_FORCE_INLINE void readChildNode(const PtNodeParams &ptNodeParams) {
+ if (ptNodeParams.hasChildren()) {
+ mReadingState.mTotalCodePointCountSinceInitialization +=
+ ptNodeParams.getCodePointCount();
+ mReadingState.mTotalPtNodeIndexInThisArrayChain = 0;
+ mReadingState.mPtNodeArrayIndexInThisArrayChain = 0;
+ mReadingState.mPos = ptNodeParams.getChildrenPos();
+ mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
+ // Read children node array.
+ nextPtNodeArray();
+ } else {
+ mReadingState.mPos = NOT_A_DICT_POS;
+ }
+ }
+
+ // Read the parent node of the current node.
+ AK_FORCE_INLINE void readParentNode(const PtNodeParams &ptNodeParams) {
+ if (ptNodeParams.getParentPos() != NOT_A_DICT_POS) {
+ mReadingState.mTotalCodePointCountSinceInitialization +=
+ ptNodeParams.getCodePointCount();
+ mReadingState.mTotalPtNodeIndexInThisArrayChain = 1;
+ mReadingState.mPtNodeArrayIndexInThisArrayChain = 1;
+ mReadingState.mRemainingPtNodeCountInThisArray = 1;
+ mReadingState.mPos = ptNodeParams.getParentPos();
+ mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
+ mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS;
+ } else {
+ mReadingState.mPos = NOT_A_DICT_POS;
+ }
+ }
+
+ AK_FORCE_INLINE int getPosOfLastForwardLinkField() const {
+ return mReadingState.mPosOfLastForwardLinkField;
+ }
+
+ AK_FORCE_INLINE int getPosOfLastPtNodeArrayHead() const {
+ return mReadingState.mPosOfThisPtNodeArrayHead;
+ }
+
+ bool traverseAllPtNodesInPostorderDepthFirstManner(TraversingEventListener *const listener);
+
+ bool traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
+ TraversingEventListener *const listener);
+
+ int getCodePointsAndProbabilityAndReturnCodePointCount(const int maxCodePointCount,
+ int *const outCodePoints, int *const outUnigramProbability);
+
+ int getTerminalPtNodePositionOfWord(const int *const inWord, const int length,
+ const bool forceLowerCaseSearch);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DynamicPtReadingHelper);
+
+ // This class encapsulates the reading state of a position in the dictionary. It points at a
+ // specific PtNode in the dictionary.
+ class PtNodeReadingState {
+ public:
+ // Note that copy constructor and assignment operator are used for this class to use
+ // std::vector.
+ PtNodeReadingState() : mPos(NOT_A_DICT_POS), mRemainingPtNodeCountInThisArray(0),
+ mTotalCodePointCountSinceInitialization(0), mTotalPtNodeIndexInThisArrayChain(0),
+ mPtNodeArrayIndexInThisArrayChain(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS),
+ mPosOfThisPtNodeArrayHead(NOT_A_DICT_POS) {}
+
+ int mPos;
+ // Remaining node count in the current array.
+ int mRemainingPtNodeCountInThisArray;
+ int mTotalCodePointCountSinceInitialization;
+ // Counter of PtNodes used to avoid infinite loops caused by broken or malicious links.
+ int mTotalPtNodeIndexInThisArrayChain;
+ // Counter of PtNode arrays used to avoid infinite loops caused by cyclic links of empty
+ // PtNode arrays.
+ int mPtNodeArrayIndexInThisArrayChain;
+ int mPosOfLastForwardLinkField;
+ int mPosOfThisPtNodeArrayHead;
+ };
+
+ static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP;
+ static const int MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP;
+ static const size_t MAX_READING_STATE_STACK_SIZE;
+
+ // TODO: Introduce error code to track what caused the error.
+ bool mIsError;
+ PtNodeReadingState mReadingState;
+ const PtNodeReader *const mPtNodeReader;
+ const PtNodeArrayReader *const mPtNodeArrayReader;
+ std::vector<PtNodeReadingState> mReadingStateStack;
+
+ void nextPtNodeArray();
+
+ void followForwardLink();
+
+ AK_FORCE_INLINE void pushReadingStateToStack() {
+ if (mReadingStateStack.size() > MAX_READING_STATE_STACK_SIZE) {
+ AKLOGI("Reading state stack overflow. Max size: %zd", MAX_READING_STATE_STACK_SIZE);
+ ASSERT(false);
+ mIsError = true;
+ mReadingState.mPos = NOT_A_DICT_POS;
+ } else {
+ mReadingStateStack.push_back(mReadingState);
+ }
+ }
+
+ AK_FORCE_INLINE void popReadingStateFromStack() {
+ if (mReadingStateStack.empty()) {
+ mReadingState.mPos = NOT_A_DICT_POS;
+ } else {
+ mReadingState = mReadingStateStack.back();
+ mReadingStateStack.pop_back();
+ }
+ }
+};
+} // namespace latinime
+#endif /* LATINIME_DYNAMIC_PT_READING_HELPER_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.cpp
new file mode 100644
index 0000000..b3b069f
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.cpp
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/byte_array_utils.h"
+
+namespace latinime {
+
+const DynamicPtReadingUtils::NodeFlags DynamicPtReadingUtils::MASK_MOVED = 0xC0;
+const DynamicPtReadingUtils::NodeFlags DynamicPtReadingUtils::FLAG_IS_NOT_MOVED = 0xC0;
+const DynamicPtReadingUtils::NodeFlags DynamicPtReadingUtils::FLAG_IS_MOVED = 0x40;
+const DynamicPtReadingUtils::NodeFlags DynamicPtReadingUtils::FLAG_IS_DELETED = 0x80;
+const DynamicPtReadingUtils::NodeFlags DynamicPtReadingUtils::FLAG_WILL_BECOME_NON_TERMINAL = 0x00;
+
+// TODO: Make DICT_OFFSET_ZERO_OFFSET = 0.
+// Currently, DICT_OFFSET_INVALID is 0 in Java side but offset can be 0 during GC. So, the maximum
+// value of offsets, which is 0x7FFFFF is used to represent 0 offset.
+const int DynamicPtReadingUtils::DICT_OFFSET_INVALID = 0;
+const int DynamicPtReadingUtils::DICT_OFFSET_ZERO_OFFSET = 0x7FFFFF;
+
+/* static */ int DynamicPtReadingUtils::getForwardLinkPosition(const uint8_t *const buffer,
+ const int pos) {
+ int linkAddressPos = pos;
+ return ByteArrayUtils::readSint24AndAdvancePosition(buffer, &linkAddressPos);
+}
+
+/* static */ int DynamicPtReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(
+ const uint8_t *const buffer, int *const pos) {
+ return ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos);
+}
+
+/* static */ int DynamicPtReadingUtils::getParentPtNodePos(const int parentOffset,
+ const int ptNodePos) {
+ if (parentOffset == DICT_OFFSET_INVALID) {
+ return NOT_A_DICT_POS;
+ } else if (parentOffset == DICT_OFFSET_ZERO_OFFSET) {
+ return ptNodePos;
+ } else {
+ return parentOffset + ptNodePos;
+ }
+}
+
+/* static */ int DynamicPtReadingUtils::readChildrenPositionAndAdvancePosition(
+ const uint8_t *const buffer, int *const pos) {
+ const int base = *pos;
+ const int offset = ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos);
+ if (offset == DICT_OFFSET_INVALID) {
+ // The PtNode does not have children.
+ return NOT_A_DICT_POS;
+ } else if (offset == DICT_OFFSET_ZERO_OFFSET) {
+ return base;
+ } else {
+ return base + offset;
+ }
+}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h
new file mode 100644
index 0000000..5912f65
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DYNAMIC_PT_READING_UTILS_H
+#define LATINIME_DYNAMIC_PT_READING_UTILS_H
+
+#include <cstdint>
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+class DynamicPtReadingUtils {
+ public:
+ typedef uint8_t NodeFlags;
+
+ static const int DICT_OFFSET_INVALID;
+ static const int DICT_OFFSET_ZERO_OFFSET;
+
+ static int getForwardLinkPosition(const uint8_t *const buffer, const int pos);
+
+ static AK_FORCE_INLINE bool isValidForwardLinkPosition(const int forwardLinkAddress) {
+ return forwardLinkAddress != 0;
+ }
+
+ static int getParentPtNodePosOffsetAndAdvancePosition(const uint8_t *const buffer,
+ int *const pos);
+
+ static int getParentPtNodePos(const int parentOffset, const int ptNodePos);
+
+ static int readChildrenPositionAndAdvancePosition(const uint8_t *const buffer, int *const pos);
+
+ /**
+ * Node Flags
+ */
+ static AK_FORCE_INLINE bool isMoved(const NodeFlags flags) {
+ return FLAG_IS_MOVED == (MASK_MOVED & flags);
+ }
+
+ static AK_FORCE_INLINE bool isDeleted(const NodeFlags flags) {
+ return FLAG_IS_DELETED == (MASK_MOVED & flags);
+ }
+
+ static AK_FORCE_INLINE bool willBecomeNonTerminal(const NodeFlags flags) {
+ return FLAG_WILL_BECOME_NON_TERMINAL == (MASK_MOVED & flags);
+ }
+
+ static AK_FORCE_INLINE NodeFlags updateAndGetFlags(const NodeFlags originalFlags,
+ const bool isMoved, const bool isDeleted, const bool willBecomeNonTerminal) {
+ NodeFlags flags = originalFlags;
+ flags = willBecomeNonTerminal ?
+ ((flags & (~MASK_MOVED)) | FLAG_WILL_BECOME_NON_TERMINAL) : flags;
+ flags = isMoved ? ((flags & (~MASK_MOVED)) | FLAG_IS_MOVED) : flags;
+ flags = isDeleted ? ((flags & (~MASK_MOVED)) | FLAG_IS_DELETED) : flags;
+ flags = (!isMoved && !isDeleted && !willBecomeNonTerminal) ?
+ ((flags & (~MASK_MOVED)) | FLAG_IS_NOT_MOVED) : flags;
+ return flags;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPtReadingUtils);
+
+ static const NodeFlags MASK_MOVED;
+ static const NodeFlags FLAG_IS_NOT_MOVED;
+ static const NodeFlags FLAG_IS_MOVED;
+ static const NodeFlags FLAG_IS_DELETED;
+ static const NodeFlags FLAG_WILL_BECOME_NON_TERMINAL;
+};
+} // namespace latinime
+#endif /* LATINIME_DYNAMIC_PT_READING_UTILS_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp
new file mode 100644
index 0000000..9fc452c
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp
@@ -0,0 +1,307 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h"
+
+#include "third_party/android_prediction/suggest/core/dictionary/property/unigram_property.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+const int DynamicPtUpdatingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;
+
+bool DynamicPtUpdatingHelper::addUnigramWord(
+ DynamicPtReadingHelper *const readingHelper,
+ const int *const wordCodePoints, const int codePointCount,
+ const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram) {
+ int parentPos = NOT_A_DICT_POS;
+ while (!readingHelper->isEnd()) {
+ const PtNodeParams ptNodeParams(readingHelper->getPtNodeParams());
+ if (!ptNodeParams.isValid()) {
+ break;
+ }
+ const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount();
+ if (!readingHelper->isMatchedCodePoint(ptNodeParams, 0 /* index */,
+ wordCodePoints[matchedCodePointCount])) {
+ // The first code point is different from target code point. Skip this node and read
+ // the next sibling node.
+ readingHelper->readNextSiblingNode(ptNodeParams);
+ continue;
+ }
+ // Check following merged node code points.
+ const int nodeCodePointCount = ptNodeParams.getCodePointCount();
+ for (int j = 1; j < nodeCodePointCount; ++j) {
+ const int nextIndex = matchedCodePointCount + j;
+ if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(ptNodeParams, j,
+ wordCodePoints[matchedCodePointCount + j])) {
+ *outAddedNewUnigram = true;
+ return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, unigramProperty,
+ wordCodePoints + matchedCodePointCount,
+ codePointCount - matchedCodePointCount);
+ }
+ }
+ // All characters are matched.
+ if (codePointCount == readingHelper->getTotalCodePointCount(ptNodeParams)) {
+ return setPtNodeProbability(&ptNodeParams, unigramProperty, outAddedNewUnigram);
+ }
+ if (!ptNodeParams.hasChildren()) {
+ *outAddedNewUnigram = true;
+ return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams, unigramProperty,
+ wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams),
+ codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams));
+ }
+ // Advance to the children nodes.
+ parentPos = ptNodeParams.getHeadPos();
+ readingHelper->readChildNode(ptNodeParams);
+ }
+ if (readingHelper->isError()) {
+ // The dictionary is invalid.
+ return false;
+ }
+ int pos = readingHelper->getPosOfLastForwardLinkField();
+ *outAddedNewUnigram = true;
+ return createAndInsertNodeIntoPtNodeArray(parentPos,
+ wordCodePoints + readingHelper->getPrevTotalCodePointCount(),
+ codePointCount - readingHelper->getPrevTotalCodePointCount(),
+ unigramProperty, &pos);
+}
+
+bool DynamicPtUpdatingHelper::addNgramEntry(const PtNodePosArrayView prevWordsPtNodePos,
+ const int wordPos, const BigramProperty *const bigramProperty,
+ bool *const outAddedNewEntry) {
+ if (prevWordsPtNodePos.empty()) {
+ return false;
+ }
+ ASSERT(prevWordsPtNodePos.size() <= MAX_PREV_WORD_COUNT_FOR_N_GRAM);
+ int prevWordTerminalIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ for (size_t i = 0; i < prevWordsPtNodePos.size(); ++i) {
+ prevWordTerminalIds[i] = mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(
+ prevWordsPtNodePos[i]).getTerminalId();
+ }
+ const WordIdArrayView prevWordIds(prevWordTerminalIds, prevWordsPtNodePos.size());
+ const int wordId =
+ mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(wordPos).getTerminalId();
+ return mPtNodeWriter->addNgramEntry(prevWordIds, wordId, bigramProperty, outAddedNewEntry);
+}
+
+bool DynamicPtUpdatingHelper::removeNgramEntry(const PtNodePosArrayView prevWordsPtNodePos,
+ const int wordPos) {
+ if (prevWordsPtNodePos.empty()) {
+ return false;
+ }
+ ASSERT(prevWordsPtNodePos.size() <= MAX_PREV_WORD_COUNT_FOR_N_GRAM);
+ int prevWordTerminalIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ for (size_t i = 0; i < prevWordsPtNodePos.size(); ++i) {
+ prevWordTerminalIds[i] = mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(
+ prevWordsPtNodePos[i]).getTerminalId();
+ }
+ const WordIdArrayView prevWordIds(prevWordTerminalIds, prevWordsPtNodePos.size());
+ const int wordId =
+ mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(wordPos).getTerminalId();
+ return mPtNodeWriter->removeNgramEntry(prevWordIds, wordId);
+}
+
+bool DynamicPtUpdatingHelper::addShortcutTarget(const int wordPos,
+ const int *const targetCodePoints, const int targetCodePointCount,
+ const int shortcutProbability) {
+ const PtNodeParams ptNodeParams(mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(wordPos));
+ return mPtNodeWriter->addShortcutTarget(&ptNodeParams, targetCodePoints, targetCodePointCount,
+ shortcutProbability);
+}
+
+bool DynamicPtUpdatingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos,
+ const int *const nodeCodePoints, const int nodeCodePointCount,
+ const UnigramProperty *const unigramProperty, int *const forwardLinkFieldPos) {
+ const int newPtNodeArrayPos = mBuffer->getTailPosition();
+ if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
+ newPtNodeArrayPos, forwardLinkFieldPos)) {
+ return false;
+ }
+ return createNewPtNodeArrayWithAChildPtNode(parentPos, nodeCodePoints, nodeCodePointCount,
+ unigramProperty);
+}
+
+bool DynamicPtUpdatingHelper::setPtNodeProbability(const PtNodeParams *const originalPtNodeParams,
+ const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram) {
+ if (originalPtNodeParams->isTerminal() && !originalPtNodeParams->isDeleted()) {
+ // Overwrites the probability.
+ *outAddedNewUnigram = false;
+ return mPtNodeWriter->updatePtNodeUnigramProperty(originalPtNodeParams, unigramProperty);
+ } else {
+ // Make the node terminal and write the probability.
+ *outAddedNewUnigram = true;
+ const int movedPos = mBuffer->getTailPosition();
+ int writingPos = movedPos;
+ const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams,
+ unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
+ true /* isTerminal */, originalPtNodeParams->getParentPos(),
+ originalPtNodeParams->getCodePointCount(), originalPtNodeParams->getCodePoints(),
+ unigramProperty->getProbability()));
+ if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
+ unigramProperty, &writingPos)) {
+ return false;
+ }
+ if (!mPtNodeWriter->markPtNodeAsMoved(originalPtNodeParams, movedPos, movedPos)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool DynamicPtUpdatingHelper::createChildrenPtNodeArrayAndAChildPtNode(
+ const PtNodeParams *const parentPtNodeParams, const UnigramProperty *const unigramProperty,
+ const int *const codePoints, const int codePointCount) {
+ const int newPtNodeArrayPos = mBuffer->getTailPosition();
+ if (!mPtNodeWriter->updateChildrenPosition(parentPtNodeParams, newPtNodeArrayPos)) {
+ return false;
+ }
+ return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(), codePoints,
+ codePointCount, unigramProperty);
+}
+
+bool DynamicPtUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
+ const int parentPtNodePos, const int *const nodeCodePoints, const int nodeCodePointCount,
+ const UnigramProperty *const unigramProperty) {
+ int writingPos = mBuffer->getTailPosition();
+ if (!DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
+ 1 /* arraySize */, &writingPos)) {
+ return false;
+ }
+ const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
+ unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), true /* isTerminal */,
+ parentPtNodePos, nodeCodePointCount, nodeCodePoints,
+ unigramProperty->getProbability()));
+ if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
+ unigramProperty, &writingPos)) {
+ return false;
+ }
+ if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
+ NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
+ return false;
+ }
+ return true;
+}
+
+// Returns whether the dictionary updating was succeeded or not.
+bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
+ const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
+ const UnigramProperty *const unigramProperty, const int *const newNodeCodePoints,
+ const int newNodeCodePointCount) {
+ // When addsExtraChild is true, split the reallocating PtNode and add new child.
+ // Reallocating PtNode: abcde, newNode: abcxy.
+ // abc (1st, not terminal) __ de (2nd)
+ // \_ xy (extra child, terminal)
+ // Otherwise, this method makes 1st part terminal and write information in unigramProperty.
+ // Reallocating PtNode: abcde, newNode: abc.
+ // abc (1st, terminal) __ de (2nd)
+ const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount;
+ const int firstPartOfReallocatedPtNodePos = mBuffer->getTailPosition();
+ int writingPos = firstPartOfReallocatedPtNodePos;
+ // Write the 1st part of the reallocating node. The children position will be updated later
+ // with actual children position.
+ if (addsExtraChild) {
+ const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
+ false /* isNotAWord */, false /* isBlacklisted */, false /* isTerminal */,
+ reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount,
+ reallocatingPtNodeParams->getCodePoints(), NOT_A_PROBABILITY));
+ if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) {
+ return false;
+ }
+ } else {
+ const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
+ unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
+ true /* isTerminal */, reallocatingPtNodeParams->getParentPos(),
+ overlappingCodePointCount, reallocatingPtNodeParams->getCodePoints(),
+ unigramProperty->getProbability()));
+ if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
+ unigramProperty, &writingPos)) {
+ return false;
+ }
+ }
+ const int actualChildrenPos = writingPos;
+ // Create new children PtNode array.
+ const size_t newPtNodeCount = addsExtraChild ? 2 : 1;
+ if (!DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
+ newPtNodeCount, &writingPos)) {
+ return false;
+ }
+ // Write the 2nd part of the reallocating node.
+ const int secondPartOfReallocatedPtNodePos = writingPos;
+ const PtNodeParams childPartPtNodeParams(getUpdatedPtNodeParams(reallocatingPtNodeParams,
+ reallocatingPtNodeParams->isNotAWord(), reallocatingPtNodeParams->isBlacklisted(),
+ reallocatingPtNodeParams->isTerminal(), firstPartOfReallocatedPtNodePos,
+ reallocatingPtNodeParams->getCodePointCount() - overlappingCodePointCount,
+ reallocatingPtNodeParams->getCodePoints() + overlappingCodePointCount,
+ reallocatingPtNodeParams->getProbability()));
+ if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&childPartPtNodeParams, &writingPos)) {
+ return false;
+ }
+ if (addsExtraChild) {
+ const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode(
+ unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
+ true /* isTerminal */, firstPartOfReallocatedPtNodePos,
+ newNodeCodePointCount - overlappingCodePointCount,
+ newNodeCodePoints + overlappingCodePointCount, unigramProperty->getProbability()));
+ if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&extraChildPtNodeParams,
+ unigramProperty, &writingPos)) {
+ return false;
+ }
+ }
+ if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
+ NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
+ return false;
+ }
+ // Update original reallocating PtNode as moved.
+ if (!mPtNodeWriter->markPtNodeAsMoved(reallocatingPtNodeParams, firstPartOfReallocatedPtNodePos,
+ secondPartOfReallocatedPtNodePos)) {
+ return false;
+ }
+ // Load node info. Information of the 1st part will be fetched.
+ const PtNodeParams ptNodeParams(
+ mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(firstPartOfReallocatedPtNodePos));
+ // Update children position.
+ return mPtNodeWriter->updateChildrenPosition(&ptNodeParams, actualChildrenPos);
+}
+
+const PtNodeParams DynamicPtUpdatingHelper::getUpdatedPtNodeParams(
+ const PtNodeParams *const originalPtNodeParams,
+ const bool isNotAWord, const bool isBlacklisted, const bool isTerminal, const int parentPos,
+ const int codePointCount, const int *const codePoints, const int probability) const {
+ const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
+ isBlacklisted, isNotAWord, isTerminal, false /* hasShortcutTargets */,
+ false /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */,
+ CHILDREN_POSITION_FIELD_SIZE);
+ return PtNodeParams(originalPtNodeParams, flags, parentPos, codePointCount, codePoints,
+ probability);
+}
+
+const PtNodeParams DynamicPtUpdatingHelper::getPtNodeParamsForNewPtNode(
+ const bool isNotAWord, const bool isBlacklisted, const bool isTerminal,
+ const int parentPos, const int codePointCount, const int *const codePoints,
+ const int probability) const {
+ const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
+ isBlacklisted, isNotAWord, isTerminal, false /* hasShortcutTargets */,
+ false /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */,
+ CHILDREN_POSITION_FIELD_SIZE);
+ return PtNodeParams(flags, parentPos, codePointCount, codePoints, probability);
+}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h
new file mode 100644
index 0000000..4be5828
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DYNAMIC_PT_UPDATING_HELPER_H
+#define LATINIME_DYNAMIC_PT_UPDATING_HELPER_H
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+#include "third_party/android_prediction/utils/int_array_view.h"
+
+namespace latinime {
+
+class BigramProperty;
+class BufferWithExtendableBuffer;
+class DynamicPtReadingHelper;
+class PtNodeReader;
+class PtNodeWriter;
+class UnigramProperty;
+
+class DynamicPtUpdatingHelper {
+ public:
+ DynamicPtUpdatingHelper(BufferWithExtendableBuffer *const buffer,
+ const PtNodeReader *const ptNodeReader, PtNodeWriter *const ptNodeWriter)
+ : mBuffer(buffer), mPtNodeReader(ptNodeReader), mPtNodeWriter(ptNodeWriter) {}
+
+ ~DynamicPtUpdatingHelper() {}
+
+ // Add a word to the dictionary. If the word already exists, update the probability.
+ bool addUnigramWord(DynamicPtReadingHelper *const readingHelper,
+ const int *const wordCodePoints, const int codePointCount,
+ const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram);
+
+ // Add an n-gram entry.
+ bool addNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, const int wordPos,
+ const BigramProperty *const bigramProperty, bool *const outAddedNewEntry);
+
+ // Remove an n-gram entry.
+ bool removeNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, const int wordPos);
+
+ // Add a shortcut target.
+ bool addShortcutTarget(const int wordPos, const int *const targetCodePoints,
+ const int targetCodePointCount, const int shortcutProbability);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPtUpdatingHelper);
+
+ static const int CHILDREN_POSITION_FIELD_SIZE;
+
+ BufferWithExtendableBuffer *const mBuffer;
+ const PtNodeReader *const mPtNodeReader;
+ PtNodeWriter *const mPtNodeWriter;
+
+ bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
+ const int nodeCodePointCount, const UnigramProperty *const unigramProperty,
+ int *const forwardLinkFieldPos);
+
+ bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams,
+ const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram);
+
+ bool createChildrenPtNodeArrayAndAChildPtNode(const PtNodeParams *const parentPtNodeParams,
+ const UnigramProperty *const unigramProperty, const int *const codePoints,
+ const int codePointCount);
+
+ bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints,
+ const int nodeCodePointCount, const UnigramProperty *const unigramProperty);
+
+ bool reallocatePtNodeAndAddNewPtNodes(
+ const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
+ const UnigramProperty *const unigramProperty, const int *const newNodeCodePoints,
+ const int newNodeCodePointCount);
+
+ const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams,
+ const bool isNotAWord, const bool isBlacklisted, const bool isTerminal,
+ const int parentPos, const int codePointCount,
+ const int *const codePoints, const int probability) const;
+
+ const PtNodeParams getPtNodeParamsForNewPtNode(const bool isNotAWord, const bool isBlacklisted,
+ const bool isTerminal, const int parentPos,
+ const int codePointCount, const int *const codePoints, const int probability) const;
+};
+} // namespace latinime
+#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_UPDATING_HELPER_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.cpp
new file mode 100644
index 0000000..195bf2a
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.cpp
@@ -0,0 +1,132 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+const size_t DynamicPtWritingUtils::MAX_PTNODE_ARRAY_SIZE_TO_USE_SMALL_SIZE_FIELD = 0x7F;
+const size_t DynamicPtWritingUtils::MAX_PTNODE_ARRAY_SIZE = 0x7FFF;
+const int DynamicPtWritingUtils::SMALL_PTNODE_ARRAY_SIZE_FIELD_SIZE = 1;
+const int DynamicPtWritingUtils::LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE = 2;
+const int DynamicPtWritingUtils::LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE_FLAG = 0x8000;
+const int DynamicPtWritingUtils::DICT_OFFSET_FIELD_SIZE = 3;
+const int DynamicPtWritingUtils::MAX_DICT_OFFSET_VALUE = 0x7FFFFF;
+const int DynamicPtWritingUtils::MIN_DICT_OFFSET_VALUE = -0x7FFFFF;
+const int DynamicPtWritingUtils::DICT_OFFSET_NEGATIVE_FLAG = 0x800000;
+const int DynamicPtWritingUtils::NODE_FLAG_FIELD_SIZE = 1;
+
+/* static */ bool DynamicPtWritingUtils::writeEmptyDictionary(
+ BufferWithExtendableBuffer *const buffer, const int rootPos) {
+ int writingPos = rootPos;
+ if (!writePtNodeArraySizeAndAdvancePosition(buffer, 0 /* arraySize */, &writingPos)) {
+ return false;
+ }
+ return writeForwardLinkPositionAndAdvancePosition(buffer, NOT_A_DICT_POS /* forwardLinkPos */,
+ &writingPos);
+}
+
+/* static */ bool DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(
+ BufferWithExtendableBuffer *const buffer, const int forwardLinkPos,
+ int *const forwardLinkFieldPos) {
+ return writeDictOffset(buffer, forwardLinkPos, (*forwardLinkFieldPos), forwardLinkFieldPos);
+}
+
+/* static */ bool DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition(
+ BufferWithExtendableBuffer *const buffer, const size_t arraySize,
+ int *const arraySizeFieldPos) {
+ // Currently, all array size field to be created has LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE to
+ // simplify updating process.
+ // TODO: Use SMALL_PTNODE_ARRAY_SIZE_FIELD_SIZE for small arrays.
+ /*if (arraySize <= MAX_PTNODE_ARRAY_SIZE_TO_USE_SMALL_SIZE_FIELD) {
+ return buffer->writeUintAndAdvancePosition(arraySize, SMALL_PTNODE_ARRAY_SIZE_FIELD_SIZE,
+ arraySizeFieldPos);
+ } else */
+ if (arraySize <= MAX_PTNODE_ARRAY_SIZE) {
+ uint32_t data = arraySize | LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE_FLAG;
+ return buffer->writeUintAndAdvancePosition(data, LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE,
+ arraySizeFieldPos);
+ } else {
+ AKLOGI("PtNode array size cannot be written because arraySize is too large: %zd",
+ arraySize);
+ ASSERT(false);
+ return false;
+ }
+}
+
+/* static */ bool DynamicPtWritingUtils::writeFlagsAndAdvancePosition(
+ BufferWithExtendableBuffer *const buffer,
+ const DynamicPtReadingUtils::NodeFlags nodeFlags, int *const nodeFlagsFieldPos) {
+ return buffer->writeUintAndAdvancePosition(nodeFlags, NODE_FLAG_FIELD_SIZE, nodeFlagsFieldPos);
+}
+
+// Note that parentOffset is offset from node's head position.
+/* static */ bool DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(
+ BufferWithExtendableBuffer *const buffer, const int parentPos, const int basePos,
+ int *const parentPosFieldPos) {
+ return writeDictOffset(buffer, parentPos, basePos, parentPosFieldPos);
+}
+
+/* static */ bool DynamicPtWritingUtils::writeCodePointsAndAdvancePosition(
+ BufferWithExtendableBuffer *const buffer, const int *const codePoints,
+ const int codePointCount, int *const codePointFieldPos) {
+ if (codePointCount <= 0) {
+ AKLOGI("code points cannot be written because codePointCount is invalid: %d",
+ codePointCount);
+ ASSERT(false);
+ return false;
+ }
+ const bool hasMultipleCodePoints = codePointCount > 1;
+ return buffer->writeCodePointsAndAdvancePosition(codePoints, codePointCount,
+ hasMultipleCodePoints, codePointFieldPos);
+}
+
+/* static */ bool DynamicPtWritingUtils::writeChildrenPositionAndAdvancePosition(
+ BufferWithExtendableBuffer *const buffer, const int childrenPosition,
+ int *const childrenPositionFieldPos) {
+ return writeDictOffset(buffer, childrenPosition, (*childrenPositionFieldPos),
+ childrenPositionFieldPos);
+}
+
+/* static */ bool DynamicPtWritingUtils::writeDictOffset(BufferWithExtendableBuffer *const buffer,
+ const int targetPos, const int basePos, int *const offsetFieldPos) {
+ int offset = targetPos - basePos;
+ if (targetPos == NOT_A_DICT_POS) {
+ offset = DynamicPtReadingUtils::DICT_OFFSET_INVALID;
+ } else if (offset == 0) {
+ offset = DynamicPtReadingUtils::DICT_OFFSET_ZERO_OFFSET;
+ }
+ if (offset > MAX_DICT_OFFSET_VALUE || offset < MIN_DICT_OFFSET_VALUE) {
+ AKLOGI("offset cannot be written because the offset is too large or too small: %d",
+ offset);
+ ASSERT(false);
+ return false;
+ }
+ uint32_t data = 0;
+ if (offset >= 0) {
+ data = offset;
+ } else {
+ data = abs(offset) | DICT_OFFSET_NEGATIVE_FLAG;
+ }
+ return buffer->writeUintAndAdvancePosition(data, DICT_OFFSET_FIELD_SIZE, offsetFieldPos);
+}
+}
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h
new file mode 100644
index 0000000..f60266c
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DYNAMIC_PT_WRITING_UTILS_H
+#define LATINIME_DYNAMIC_PT_WRITING_UTILS_H
+
+#include <cstddef>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
+
+namespace latinime {
+
+class BufferWithExtendableBuffer;
+
+class DynamicPtWritingUtils {
+ public:
+ static const int NODE_FLAG_FIELD_SIZE;
+
+ static bool writeEmptyDictionary(BufferWithExtendableBuffer *const buffer, const int rootPos);
+
+ static bool writeForwardLinkPositionAndAdvancePosition(
+ BufferWithExtendableBuffer *const buffer, const int forwardLinkPos,
+ int *const forwardLinkFieldPos);
+
+ static bool writePtNodeArraySizeAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
+ const size_t arraySize, int *const arraySizeFieldPos);
+
+ static bool writeFlags(BufferWithExtendableBuffer *const buffer,
+ const DynamicPtReadingUtils::NodeFlags nodeFlags,
+ const int nodeFlagsFieldPos) {
+ int writingPos = nodeFlagsFieldPos;
+ return writeFlagsAndAdvancePosition(buffer, nodeFlags, &writingPos);
+ }
+
+ static bool writeFlagsAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
+ const DynamicPtReadingUtils::NodeFlags nodeFlags,
+ int *const nodeFlagsFieldPos);
+
+ static bool writeParentPosOffsetAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
+ const int parentPosition, const int basePos, int *const parentPosFieldPos);
+
+ static bool writeCodePointsAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
+ const int *const codePoints, const int codePointCount, int *const codePointFieldPos);
+
+ static bool writeChildrenPositionAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
+ const int childrenPosition, int *const childrenPositionFieldPos);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPtWritingUtils);
+
+ static const size_t MAX_PTNODE_ARRAY_SIZE_TO_USE_SMALL_SIZE_FIELD;
+ static const size_t MAX_PTNODE_ARRAY_SIZE;
+ static const int SMALL_PTNODE_ARRAY_SIZE_FIELD_SIZE;
+ static const int LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE;
+ static const int LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE_FLAG;
+ static const int DICT_OFFSET_FIELD_SIZE;
+ static const int MAX_DICT_OFFSET_VALUE;
+ static const int MIN_DICT_OFFSET_VALUE;
+ static const int DICT_OFFSET_NEGATIVE_FLAG;
+
+ static bool writeDictOffset(BufferWithExtendableBuffer *const buffer, const int targetPos,
+ const int basePos, int *const offsetFieldPos);
+};
+} // namespace latinime
+#endif /* LATINIME_DYNAMIC_PT_WRITING_UTILS_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.cpp
new file mode 100644
index 0000000..cb11985
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.cpp
@@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/policy/dictionary_bigrams_structure_policy.h"
+#include "third_party/android_prediction/suggest/core/policy/dictionary_shortcuts_structure_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/byte_array_utils.h"
+
+namespace latinime {
+
+typedef PatriciaTrieReadingUtils PtReadingUtils;
+
+const PtReadingUtils::NodeFlags PtReadingUtils::MASK_CHILDREN_POSITION_TYPE = 0xC0;
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_NOPOSITION = 0x00;
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_ONEBYTE = 0x40;
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_TWOBYTES = 0x80;
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_THREEBYTES = 0xC0;
+
+// Flag for single/multiple char group
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_MULTIPLE_CHARS = 0x20;
+// Flag for terminal PtNodes
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_TERMINAL = 0x10;
+// Flag for shortcut targets presence
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_SHORTCUT_TARGETS = 0x08;
+// Flag for bigram presence
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_BIGRAMS = 0x04;
+// Flag for non-words (typically, shortcut only entries)
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_NOT_A_WORD = 0x02;
+// Flag for blacklist
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_BLACKLISTED = 0x01;
+
+/* static */ int PtReadingUtils::getPtNodeArraySizeAndAdvancePosition(
+ const uint8_t *const buffer, int *const pos) {
+ const uint8_t firstByte = ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos);
+ if (firstByte < 0x80) {
+ return firstByte;
+ } else {
+ return ((firstByte & 0x7F) << 8) ^ ByteArrayUtils::readUint8AndAdvancePosition(
+ buffer, pos);
+ }
+}
+
+/* static */ PtReadingUtils::NodeFlags PtReadingUtils::getFlagsAndAdvancePosition(
+ const uint8_t *const buffer, int *const pos) {
+ return ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos);
+}
+
+/* static */ int PtReadingUtils::getCodePointAndAdvancePosition(const uint8_t *const buffer,
+ int *const pos) {
+ return ByteArrayUtils::readCodePointAndAdvancePosition(buffer, pos);
+}
+
+// Returns the number of read characters.
+/* static */ int PtReadingUtils::getCharsAndAdvancePosition(const uint8_t *const buffer,
+ const NodeFlags flags, const int maxLength, int *const outBuffer, int *const pos) {
+ int length = 0;
+ if (hasMultipleChars(flags)) {
+ length = ByteArrayUtils::readStringAndAdvancePosition(buffer, maxLength, outBuffer,
+ pos);
+ } else {
+ const int codePoint = getCodePointAndAdvancePosition(buffer, pos);
+ if (codePoint == NOT_A_CODE_POINT) {
+ // CAVEAT: codePoint == NOT_A_CODE_POINT means the code point is
+ // CHARACTER_ARRAY_TERMINATOR. The code point must not be CHARACTER_ARRAY_TERMINATOR
+ // when the PtNode has a single code point.
+ length = 0;
+ AKLOGE("codePoint is NOT_A_CODE_POINT. pos: %d, codePoint: 0x%x, buffer[pos - 1]: 0x%x",
+ *pos - 1, codePoint, buffer[*pos - 1]);
+ ASSERT(false);
+ } else if (maxLength > 0) {
+ outBuffer[0] = codePoint;
+ length = 1;
+ }
+ }
+ return length;
+}
+
+// Returns the number of skipped characters.
+/* static */ int PtReadingUtils::skipCharacters(const uint8_t *const buffer, const NodeFlags flags,
+ const int maxLength, int *const pos) {
+ if (hasMultipleChars(flags)) {
+ return ByteArrayUtils::advancePositionToBehindString(buffer, maxLength, pos);
+ } else {
+ if (maxLength > 0) {
+ getCodePointAndAdvancePosition(buffer, pos);
+ return 1;
+ } else {
+ return 0;
+ }
+ }
+}
+
+/* static */ int PtReadingUtils::readProbabilityAndAdvancePosition(const uint8_t *const buffer,
+ int *const pos) {
+ return ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos);
+}
+
+/* static */ int PtReadingUtils::readChildrenPositionAndAdvancePosition(
+ const uint8_t *const buffer, const NodeFlags flags, int *const pos) {
+ const int base = *pos;
+ int offset = 0;
+ switch (MASK_CHILDREN_POSITION_TYPE & flags) {
+ case FLAG_CHILDREN_POSITION_TYPE_ONEBYTE:
+ offset = ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos);
+ break;
+ case FLAG_CHILDREN_POSITION_TYPE_TWOBYTES:
+ offset = ByteArrayUtils::readUint16AndAdvancePosition(buffer, pos);
+ break;
+ case FLAG_CHILDREN_POSITION_TYPE_THREEBYTES:
+ offset = ByteArrayUtils::readUint24AndAdvancePosition(buffer, pos);
+ break;
+ default:
+ // If we come here, it means we asked for the children of a word with
+ // no children.
+ return NOT_A_DICT_POS;
+ }
+ return base + offset;
+}
+
+/* static */ void PtReadingUtils::readPtNodeInfo(const uint8_t *const dictBuf, const int ptNodePos,
+ const DictionaryShortcutsStructurePolicy *const shortcutPolicy,
+ const DictionaryBigramsStructurePolicy *const bigramPolicy,
+ NodeFlags *const outFlags, int *const outCodePointCount, int *const outCodePoint,
+ int *const outProbability, int *const outChildrenPos, int *const outShortcutPos,
+ int *const outBigramPos, int *const outSiblingPos) {
+ int readingPos = ptNodePos;
+ const NodeFlags flags = getFlagsAndAdvancePosition(dictBuf, &readingPos);
+ *outFlags = flags;
+ *outCodePointCount = getCharsAndAdvancePosition(
+ dictBuf, flags, MAX_WORD_LENGTH, outCodePoint, &readingPos);
+ *outProbability = isTerminal(flags) ?
+ readProbabilityAndAdvancePosition(dictBuf, &readingPos) : NOT_A_PROBABILITY;
+ *outChildrenPos = hasChildrenInFlags(flags) ?
+ readChildrenPositionAndAdvancePosition(dictBuf, flags, &readingPos) : NOT_A_DICT_POS;
+ *outShortcutPos = NOT_A_DICT_POS;
+ if (hasShortcutTargets(flags)) {
+ *outShortcutPos = readingPos;
+ shortcutPolicy->skipAllShortcuts(&readingPos);
+ }
+ *outBigramPos = NOT_A_DICT_POS;
+ if (hasBigrams(flags)) {
+ *outBigramPos = readingPos;
+ bigramPolicy->skipAllBigrams(&readingPos);
+ }
+ *outSiblingPos = readingPos;
+}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h
new file mode 100644
index 0000000..61cd768
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PATRICIA_TRIE_READING_UTILS_H
+#define LATINIME_PATRICIA_TRIE_READING_UTILS_H
+
+#include <cstdint>
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+class DictionaryShortcutsStructurePolicy;
+class DictionaryBigramsStructurePolicy;
+
+class PatriciaTrieReadingUtils {
+ public:
+ typedef uint8_t NodeFlags;
+
+ static int getPtNodeArraySizeAndAdvancePosition(const uint8_t *const buffer, int *const pos);
+
+ static NodeFlags getFlagsAndAdvancePosition(const uint8_t *const buffer, int *const pos);
+
+ static int getCodePointAndAdvancePosition(const uint8_t *const buffer, int *const pos);
+
+ // Returns the number of read characters.
+ static int getCharsAndAdvancePosition(const uint8_t *const buffer, const NodeFlags flags,
+ const int maxLength, int *const outBuffer, int *const pos);
+
+ // Returns the number of skipped characters.
+ static int skipCharacters(const uint8_t *const buffer, const NodeFlags flags,
+ const int maxLength, int *const pos);
+
+ static int readProbabilityAndAdvancePosition(const uint8_t *const buffer, int *const pos);
+
+ static int readChildrenPositionAndAdvancePosition(const uint8_t *const buffer,
+ const NodeFlags flags, int *const pos);
+
+ /**
+ * Node Flags
+ */
+ static AK_FORCE_INLINE bool isBlacklisted(const NodeFlags flags) {
+ return (flags & FLAG_IS_BLACKLISTED) != 0;
+ }
+
+ static AK_FORCE_INLINE bool isNotAWord(const NodeFlags flags) {
+ return (flags & FLAG_IS_NOT_A_WORD) != 0;
+ }
+
+ static AK_FORCE_INLINE bool isTerminal(const NodeFlags flags) {
+ return (flags & FLAG_IS_TERMINAL) != 0;
+ }
+
+ static AK_FORCE_INLINE bool hasShortcutTargets(const NodeFlags flags) {
+ return (flags & FLAG_HAS_SHORTCUT_TARGETS) != 0;
+ }
+
+ static AK_FORCE_INLINE bool hasBigrams(const NodeFlags flags) {
+ return (flags & FLAG_HAS_BIGRAMS) != 0;
+ }
+
+ static AK_FORCE_INLINE bool hasMultipleChars(const NodeFlags flags) {
+ return (flags & FLAG_HAS_MULTIPLE_CHARS) != 0;
+ }
+
+ static AK_FORCE_INLINE bool hasChildrenInFlags(const NodeFlags flags) {
+ return FLAG_CHILDREN_POSITION_TYPE_NOPOSITION != (MASK_CHILDREN_POSITION_TYPE & flags);
+ }
+
+ static AK_FORCE_INLINE NodeFlags createAndGetFlags(const bool isBlacklisted,
+ const bool isNotAWord, const bool isTerminal, const bool hasShortcutTargets,
+ const bool hasBigrams, const bool hasMultipleChars,
+ const int childrenPositionFieldSize) {
+ NodeFlags nodeFlags = 0;
+ nodeFlags = isBlacklisted ? (nodeFlags | FLAG_IS_BLACKLISTED) : nodeFlags;
+ nodeFlags = isNotAWord ? (nodeFlags | FLAG_IS_NOT_A_WORD) : nodeFlags;
+ nodeFlags = isTerminal ? (nodeFlags | FLAG_IS_TERMINAL) : nodeFlags;
+ nodeFlags = hasShortcutTargets ? (nodeFlags | FLAG_HAS_SHORTCUT_TARGETS) : nodeFlags;
+ nodeFlags = hasBigrams ? (nodeFlags | FLAG_HAS_BIGRAMS) : nodeFlags;
+ nodeFlags = hasMultipleChars ? (nodeFlags | FLAG_HAS_MULTIPLE_CHARS) : nodeFlags;
+ if (childrenPositionFieldSize == 1) {
+ nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_ONEBYTE;
+ } else if (childrenPositionFieldSize == 2) {
+ nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_TWOBYTES;
+ } else if (childrenPositionFieldSize == 3) {
+ nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_THREEBYTES;
+ } else {
+ nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_NOPOSITION;
+ }
+ return nodeFlags;
+ }
+
+ static void readPtNodeInfo(const uint8_t *const dictBuf, const int ptNodePos,
+ const DictionaryShortcutsStructurePolicy *const shortcutPolicy,
+ const DictionaryBigramsStructurePolicy *const bigramPolicy,
+ NodeFlags *const outFlags, int *const outCodePointCount, int *const outCodePoint,
+ int *const outProbability, int *const outChildrenPos, int *const outShortcutPos,
+ int *const outBigramPos, int *const outSiblingPos);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTrieReadingUtils);
+
+ static const NodeFlags MASK_CHILDREN_POSITION_TYPE;
+ static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_NOPOSITION;
+ static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_ONEBYTE;
+ static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_TWOBYTES;
+ static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_THREEBYTES;
+
+ static const NodeFlags FLAG_HAS_MULTIPLE_CHARS;
+ static const NodeFlags FLAG_IS_TERMINAL;
+ static const NodeFlags FLAG_HAS_SHORTCUT_TARGETS;
+ static const NodeFlags FLAG_HAS_BIGRAMS;
+ static const NodeFlags FLAG_IS_NOT_A_WORD;
+ static const NodeFlags FLAG_IS_BLACKLISTED;
+};
+} // namespace latinime
+#endif /* LATINIME_PATRICIA_TRIE_NODE_READING_UTILS_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h
new file mode 100644
index 0000000..6dfe685
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PT_NODE_ARRAY_READER_H
+#define LATINIME_PT_NODE_ARRAY_READER_H
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+// Interface class used to read PtNode array information.
+class PtNodeArrayReader {
+ public:
+ virtual ~PtNodeArrayReader() {}
+
+ // Returns if the position is valid or not.
+ virtual bool readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos,
+ int *const outPtNodeCount, int *const outFirstPtNodePos) const = 0;
+
+ // Returns if the position is valid or not. NOT_A_DICT_POS is set to outNextPtNodeArrayPos when
+ // the next array doesn't exist.
+ virtual bool readForwardLinkAndReturnIfValid(const int forwordLinkPos,
+ int *const outNextPtNodeArrayPos) const = 0;
+
+ protected:
+ PtNodeArrayReader() {};
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(PtNodeArrayReader);
+};
+} // namespace latinime
+#endif /* LATINIME_PT_NODE_READER_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
new file mode 100644
index 0000000..fd29c7c
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
@@ -0,0 +1,255 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PT_NODE_PARAMS_H
+#define LATINIME_PT_NODE_PARAMS_H
+
+#include <cstring>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "third_party/android_prediction/utils/char_utils.h"
+
+namespace latinime {
+
+// This class has information of a PtNode. This class is immutable.
+class PtNodeParams {
+ public:
+ // Invalid PtNode.
+ PtNodeParams() : mHeadPos(NOT_A_DICT_POS), mFlags(0), mHasMovedFlag(false),
+ mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mCodePoints(),
+ mTerminalIdFieldPos(NOT_A_DICT_POS), mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID),
+ mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY),
+ mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(NOT_A_DICT_POS),
+ mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS),
+ mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_DICT_POS) {}
+
+ PtNodeParams(const PtNodeParams& ptNodeParams)
+ : mHeadPos(ptNodeParams.mHeadPos), mFlags(ptNodeParams.mFlags),
+ mHasMovedFlag(ptNodeParams.mHasMovedFlag), mParentPos(ptNodeParams.mParentPos),
+ mCodePointCount(ptNodeParams.mCodePointCount), mCodePoints(),
+ mTerminalIdFieldPos(ptNodeParams.mTerminalIdFieldPos),
+ mTerminalId(ptNodeParams.mTerminalId),
+ mProbabilityFieldPos(ptNodeParams.mProbabilityFieldPos),
+ mProbability(ptNodeParams.mProbability),
+ mChildrenPosFieldPos(ptNodeParams.mChildrenPosFieldPos),
+ mChildrenPos(ptNodeParams.mChildrenPos),
+ mBigramLinkedNodePos(ptNodeParams.mBigramLinkedNodePos),
+ mShortcutPos(ptNodeParams.mShortcutPos), mBigramPos(ptNodeParams.mBigramPos),
+ mSiblingPos(ptNodeParams.mSiblingPos) {
+ memcpy(mCodePoints, ptNodeParams.getCodePoints(), sizeof(int) * mCodePointCount);
+ }
+
+ // PtNode read from version 2 dictionary.
+ PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags,
+ const int codePointCount, const int *const codePoints, const int probability,
+ const int childrenPos, const int shortcutPos, const int bigramPos,
+ const int siblingPos)
+ : mHeadPos(headPos), mFlags(flags), mHasMovedFlag(false), mParentPos(NOT_A_DICT_POS),
+ mCodePointCount(codePointCount), mCodePoints(), mTerminalIdFieldPos(NOT_A_DICT_POS),
+ mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID),
+ mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability),
+ mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(childrenPos),
+ mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(shortcutPos),
+ mBigramPos(bigramPos), mSiblingPos(siblingPos) {
+ memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
+ }
+
+ // PtNode with a terminal id.
+ PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags,
+ const int parentPos, const int codePointCount, const int *const codePoints,
+ const int terminalIdFieldPos, const int terminalId, const int probability,
+ const int childrenPosFieldPos, const int childrenPos, const int siblingPos)
+ : mHeadPos(headPos), mFlags(flags), mHasMovedFlag(true), mParentPos(parentPos),
+ mCodePointCount(codePointCount), mCodePoints(),
+ mTerminalIdFieldPos(terminalIdFieldPos), mTerminalId(terminalId),
+ mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability),
+ mChildrenPosFieldPos(childrenPosFieldPos), mChildrenPos(childrenPos),
+ mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(terminalId),
+ mBigramPos(terminalId), mSiblingPos(siblingPos) {
+ memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
+ }
+
+ // Construct new params by updating existing PtNode params.
+ PtNodeParams(const PtNodeParams *const ptNodeParams,
+ const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos,
+ const int codePointCount, const int *const codePoints, const int probability)
+ : mHeadPos(ptNodeParams->getHeadPos()), mFlags(flags), mHasMovedFlag(true),
+ mParentPos(parentPos), mCodePointCount(codePointCount), mCodePoints(),
+ mTerminalIdFieldPos(ptNodeParams->getTerminalIdFieldPos()),
+ mTerminalId(ptNodeParams->getTerminalId()),
+ mProbabilityFieldPos(ptNodeParams->getProbabilityFieldPos()),
+ mProbability(probability),
+ mChildrenPosFieldPos(ptNodeParams->getChildrenPosFieldPos()),
+ mChildrenPos(ptNodeParams->getChildrenPos()),
+ mBigramLinkedNodePos(ptNodeParams->getBigramLinkedNodePos()),
+ mShortcutPos(ptNodeParams->getShortcutPos()),
+ mBigramPos(ptNodeParams->getBigramsPos()),
+ mSiblingPos(ptNodeParams->getSiblingNodePos()) {
+ memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
+ }
+
+ PtNodeParams(const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos,
+ const int codePointCount, const int *const codePoints, const int probability)
+ : mHeadPos(NOT_A_DICT_POS), mFlags(flags), mHasMovedFlag(true), mParentPos(parentPos),
+ mCodePointCount(codePointCount), mCodePoints(),
+ mTerminalIdFieldPos(NOT_A_DICT_POS),
+ mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID),
+ mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability),
+ mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(NOT_A_DICT_POS),
+ mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS),
+ mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_DICT_POS) {
+ memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
+ }
+
+ AK_FORCE_INLINE bool isValid() const {
+ return mCodePointCount > 0;
+ }
+
+ // Head position of the PtNode
+ AK_FORCE_INLINE int getHeadPos() const {
+ return mHeadPos;
+ }
+
+ // Flags
+ AK_FORCE_INLINE bool isDeleted() const {
+ return mHasMovedFlag && DynamicPtReadingUtils::isDeleted(mFlags);
+ }
+
+ AK_FORCE_INLINE bool willBecomeNonTerminal() const {
+ return mHasMovedFlag && DynamicPtReadingUtils::willBecomeNonTerminal(mFlags);
+ }
+
+ AK_FORCE_INLINE bool hasChildren() const {
+ return mChildrenPos != NOT_A_DICT_POS;
+ }
+
+ AK_FORCE_INLINE bool isTerminal() const {
+ return PatriciaTrieReadingUtils::isTerminal(mFlags);
+ }
+
+ AK_FORCE_INLINE bool isBlacklisted() const {
+ return PatriciaTrieReadingUtils::isBlacklisted(mFlags);
+ }
+
+ AK_FORCE_INLINE bool isNotAWord() const {
+ return PatriciaTrieReadingUtils::isNotAWord(mFlags);
+ }
+
+ AK_FORCE_INLINE bool hasBigrams() const {
+ return PatriciaTrieReadingUtils::hasBigrams(mFlags);
+ }
+
+ AK_FORCE_INLINE bool hasShortcutTargets() const {
+ return PatriciaTrieReadingUtils::hasShortcutTargets(mFlags);
+ }
+
+ AK_FORCE_INLINE bool representsNonWordInfo() const {
+ return getCodePointCount() > 0 && !CharUtils::isInUnicodeSpace(getCodePoints()[0])
+ && isNotAWord();
+ }
+
+ AK_FORCE_INLINE int representsBeginningOfSentence() const {
+ return getCodePointCount() > 0 && getCodePoints()[0] == CODE_POINT_BEGINNING_OF_SENTENCE
+ && isNotAWord();
+ }
+
+ // Parent node position
+ AK_FORCE_INLINE int getParentPos() const {
+ return mParentPos;
+ }
+
+ // Number of code points
+ AK_FORCE_INLINE uint8_t getCodePointCount() const {
+ return mCodePointCount;
+ }
+
+ AK_FORCE_INLINE const int *getCodePoints() const {
+ return mCodePoints;
+ }
+
+ // Probability
+ AK_FORCE_INLINE int getTerminalIdFieldPos() const {
+ return mTerminalIdFieldPos;
+ }
+
+ AK_FORCE_INLINE int getTerminalId() const {
+ return mTerminalId;
+ }
+
+ // Probability
+ AK_FORCE_INLINE int getProbabilityFieldPos() const {
+ return mProbabilityFieldPos;
+ }
+
+ AK_FORCE_INLINE int getProbability() const {
+ return mProbability;
+ }
+
+ // Children PtNode array position
+ AK_FORCE_INLINE int getChildrenPosFieldPos() const {
+ return mChildrenPosFieldPos;
+ }
+
+ AK_FORCE_INLINE int getChildrenPos() const {
+ return mChildrenPos;
+ }
+
+ // Bigram linked node position.
+ AK_FORCE_INLINE int getBigramLinkedNodePos() const {
+ return mBigramLinkedNodePos;
+ }
+
+ // Shortcutlist position
+ AK_FORCE_INLINE int getShortcutPos() const {
+ return mShortcutPos;
+ }
+
+ // Bigrams position
+ AK_FORCE_INLINE int getBigramsPos() const {
+ return mBigramPos;
+ }
+
+ // Sibling node position
+ AK_FORCE_INLINE int getSiblingNodePos() const {
+ return mSiblingPos;
+ }
+
+ private:
+ // This class have a public copy constructor to be used as a return value.
+ DISALLOW_ASSIGNMENT_OPERATOR(PtNodeParams);
+
+ const int mHeadPos;
+ const PatriciaTrieReadingUtils::NodeFlags mFlags;
+ const bool mHasMovedFlag;
+ const int mParentPos;
+ const uint8_t mCodePointCount;
+ int mCodePoints[MAX_WORD_LENGTH];
+ const int mTerminalIdFieldPos;
+ const int mTerminalId;
+ const int mProbabilityFieldPos;
+ const int mProbability;
+ const int mChildrenPosFieldPos;
+ const int mChildrenPos;
+ const int mBigramLinkedNodePos;
+ const int mShortcutPos;
+ const int mBigramPos;
+ const int mSiblingPos;
+};
+} // namespace latinime
+#endif /* LATINIME_PT_NODE_PARAMS_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h
new file mode 100644
index 0000000..3107834
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PT_NODE_READER_H
+#define LATINIME_PT_NODE_READER_H
+
+#include "third_party/android_prediction/defines.h"
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+
+namespace latinime {
+
+// Interface class used to read PtNode information.
+class PtNodeReader {
+ public:
+ virtual ~PtNodeReader() {}
+ virtual const PtNodeParams fetchPtNodeParamsInBufferFromPtNodePos(
+ const int ptNodePos) const = 0;
+
+ protected:
+ PtNodeReader() {};
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(PtNodeReader);
+};
+} // namespace latinime
+#endif /* LATINIME_PT_NODE_READER_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h
new file mode 100644
index 0000000..f1dd8d6
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PT_NODE_WRITER_H
+#define LATINIME_PT_NODE_WRITER_H
+
+#include <unordered_map>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+#include "third_party/android_prediction/utils/int_array_view.h"
+
+namespace latinime {
+
+class BigramProperty;
+class UnigramProperty;
+
+// Interface class used to write PtNode information.
+class PtNodeWriter {
+ public:
+ typedef std::unordered_map<int, int> PtNodeArrayPositionRelocationMap;
+ typedef std::unordered_map<int, int> PtNodePositionRelocationMap;
+ struct DictPositionRelocationMap {
+ public:
+ DictPositionRelocationMap()
+ : mPtNodeArrayPositionRelocationMap(), mPtNodePositionRelocationMap() {}
+
+ PtNodeArrayPositionRelocationMap mPtNodeArrayPositionRelocationMap;
+ PtNodePositionRelocationMap mPtNodePositionRelocationMap;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DictPositionRelocationMap);
+ };
+
+ virtual ~PtNodeWriter() {}
+
+ virtual bool markPtNodeAsDeleted(const PtNodeParams *const toBeUpdatedPtNodeParams) = 0;
+
+ virtual bool markPtNodeAsMoved(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const int movedPos, const int bigramLinkedNodePos) = 0;
+
+ virtual bool markPtNodeAsWillBecomeNonTerminal(
+ const PtNodeParams *const toBeUpdatedPtNodeParams) = 0;
+
+ virtual bool updatePtNodeUnigramProperty(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const UnigramProperty *const unigramProperty) = 0;
+
+ virtual bool updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(
+ const PtNodeParams *const toBeUpdatedPtNodeParams,
+ bool *const outNeedsToKeepPtNode) = 0;
+
+ virtual bool updateChildrenPosition(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const int newChildrenPosition) = 0;
+
+ virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
+ int *const ptNodeWritingPos) = 0;
+
+ virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
+ const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos) = 0;
+
+ virtual bool addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
+ const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) = 0;
+
+ virtual bool removeNgramEntry(const WordIdArrayView prevWordIds, const int wordId) = 0;
+
+ virtual bool updateAllBigramEntriesAndDeleteUselessEntries(
+ const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount) = 0;
+
+ virtual bool updateAllPositionFields(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const DictPositionRelocationMap *const dictPositionRelocationMap,
+ int *const outBigramEntryCount) = 0;
+
+ virtual bool addShortcutTarget(const PtNodeParams *const ptNodeParams,
+ const int *const targetCodePoints, const int targetCodePointCount,
+ const int shortcutProbability) = 0;
+
+ protected:
+ PtNodeWriter() {};
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(PtNodeWriter);
+};
+} // namespace latinime
+#endif /* LATINIME_PT_NODE_WRITER_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.cpp
new file mode 100644
index 0000000..9395877
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h"
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/byte_array_utils.h"
+
+namespace latinime {
+
+// Flag for presence of more attributes
+const ShortcutListReadingUtils::ShortcutFlags
+ ShortcutListReadingUtils::FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
+// Mask for attribute probability, stored on 4 bits inside the flags byte.
+const ShortcutListReadingUtils::ShortcutFlags
+ ShortcutListReadingUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
+const int ShortcutListReadingUtils::SHORTCUT_LIST_SIZE_FIELD_SIZE = 2;
+// The numeric value of the shortcut probability that means 'whitelist'.
+const int ShortcutListReadingUtils::WHITELIST_SHORTCUT_PROBABILITY = 15;
+
+/* static */ ShortcutListReadingUtils::ShortcutFlags
+ ShortcutListReadingUtils::getFlagsAndForwardPointer(const uint8_t *const dictRoot,
+ int *const pos) {
+ return ByteArrayUtils::readUint8AndAdvancePosition(dictRoot, pos);
+}
+
+/* static */ int ShortcutListReadingUtils::getShortcutListSizeAndForwardPointer(
+ const uint8_t *const dictRoot, int *const pos) {
+ // readUint16andAdvancePosition() returns an offset *including* the uint16 field itself.
+ return ByteArrayUtils::readUint16AndAdvancePosition(dictRoot, pos)
+ - SHORTCUT_LIST_SIZE_FIELD_SIZE;
+}
+
+/* static */ int ShortcutListReadingUtils::readShortcutTarget(
+ const uint8_t *const dictRoot, const int maxLength, int *const outWord, int *const pos) {
+ return ByteArrayUtils::readStringAndAdvancePosition(dictRoot, maxLength, outWord, pos);
+}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h
new file mode 100644
index 0000000..58e4890
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_SHORTCUT_LIST_READING_UTILS_H
+#define LATINIME_SHORTCUT_LIST_READING_UTILS_H
+
+#include <cstdint>
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+class ShortcutListReadingUtils {
+ public:
+ typedef uint8_t ShortcutFlags;
+
+ static ShortcutFlags getFlagsAndForwardPointer(const uint8_t *const dictRoot, int *const pos);
+
+ static AK_FORCE_INLINE int getProbabilityFromFlags(const ShortcutFlags flags) {
+ return flags & MASK_ATTRIBUTE_PROBABILITY;
+ }
+
+ static AK_FORCE_INLINE bool hasNext(const ShortcutFlags flags) {
+ return (flags & FLAG_ATTRIBUTE_HAS_NEXT) != 0;
+ }
+
+ // This method returns the size of the shortcut list region excluding the shortcut list size
+ // field at the beginning.
+ static int getShortcutListSizeAndForwardPointer(const uint8_t *const dictRoot, int *const pos);
+
+ static AK_FORCE_INLINE int getShortcutListSizeFieldSize() {
+ return SHORTCUT_LIST_SIZE_FIELD_SIZE;
+ }
+
+ static AK_FORCE_INLINE void skipShortcuts(const uint8_t *const dictRoot, int *const pos) {
+ const int shortcutListSize = getShortcutListSizeAndForwardPointer(dictRoot, pos);
+ *pos += shortcutListSize;
+ }
+
+ static AK_FORCE_INLINE bool isWhitelist(const ShortcutFlags flags) {
+ return getProbabilityFromFlags(flags) == WHITELIST_SHORTCUT_PROBABILITY;
+ }
+
+ static int readShortcutTarget(const uint8_t *const dictRoot, const int maxLength,
+ int *const outWord, int *const pos);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(ShortcutListReadingUtils);
+
+ static const ShortcutFlags FLAG_ATTRIBUTE_HAS_NEXT;
+ static const ShortcutFlags MASK_ATTRIBUTE_PROBABILITY;
+ static const int SHORTCUT_LIST_SIZE_FIELD_SIZE;
+ static const int WHITELIST_SHORTCUT_PROBABILITY;
+};
+} // namespace latinime
+#endif // LATINIME_SHORTCUT_LIST_READING_UTILS_H
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h
new file mode 100644
index 0000000..7e03322
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_BIGRAM_LIST_POLICY_H
+#define LATINIME_BIGRAM_LIST_POLICY_H
+
+#include <cstdint>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/policy/dictionary_bigrams_structure_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h"
+
+namespace latinime {
+
+class BigramListPolicy : public DictionaryBigramsStructurePolicy {
+ public:
+ BigramListPolicy(const uint8_t *const bigramsBuf, const int bufSize)
+ : mBigramsBuf(bigramsBuf), mBufSize(bufSize) {}
+
+ ~BigramListPolicy() {}
+
+ void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
+ int *const pos) const {
+ BigramListReadWriteUtils::BigramFlags flags;
+ if (!BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(mBigramsBuf,
+ mBufSize, &flags, outBigramPos, pos)) {
+ AKLOGE("Cannot read bigram entry. mBufSize: %d, pos: %d. ", mBufSize, *pos);
+ *outProbability = NOT_A_PROBABILITY;
+ *outHasNext = false;
+ return;
+ }
+ *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags);
+ *outHasNext = BigramListReadWriteUtils::hasNext(flags);
+ }
+
+ bool skipAllBigrams(int *const pos) const {
+ return BigramListReadWriteUtils::skipExistingBigrams(mBigramsBuf, mBufSize, pos);
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListPolicy);
+
+ const uint8_t *const mBigramsBuf;
+ const int mBufSize;
+};
+} // namespace latinime
+#endif // LATINIME_BIGRAM_LIST_POLICY_H
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
new file mode 100644
index 0000000..f156b8f
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
@@ -0,0 +1,470 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h"
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/dicnode/dic_node.h"
+#include "third_party/android_prediction/suggest/core/dicnode/dic_node_vector.h"
+#include "third_party/android_prediction/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
+#include "third_party/android_prediction/suggest/core/dictionary/ngram_listener.h"
+#include "third_party/android_prediction/suggest/core/session/prev_words_info.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/probability_utils.h"
+#include "third_party/android_prediction/utils/char_utils.h"
+
+namespace latinime {
+
+void PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
+ DicNodeVector *const childDicNodes) const {
+ if (!dicNode->hasChildren()) {
+ return;
+ }
+ int nextPos = dicNode->getChildrenPtNodeArrayPos();
+ if (nextPos < 0 || nextPos >= mDictBufferSize) {
+ AKLOGE("Children PtNode array position is invalid. pos: %d, dict size: %d",
+ nextPos, mDictBufferSize);
+ mIsCorrupted = true;
+ ASSERT(false);
+ return;
+ }
+ const int childCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
+ mDictRoot, &nextPos);
+ for (int i = 0; i < childCount; i++) {
+ if (nextPos < 0 || nextPos >= mDictBufferSize) {
+ AKLOGE("Child PtNode position is invalid. pos: %d, dict size: %d, childCount: %d / %d",
+ nextPos, mDictBufferSize, i, childCount);
+ mIsCorrupted = true;
+ ASSERT(false);
+ return;
+ }
+ nextPos = createAndGetLeavingChildNode(dicNode, nextPos, childDicNodes);
+ }
+}
+
+// This retrieves code points and the probability of the word by its terminal position.
+// Due to the fact that words are ordered in the dictionary in a strict breadth-first order,
+// it is possible to check for this with advantageous complexity. For each PtNode array, we search
+// for PtNodes with children and compare the children position with the position we look for.
+// When we shoot the position we look for, it means the word we look for is in the children
+// of the previous PtNode. The only tricky part is the fact that if we arrive at the end of a
+// PtNode array with the last PtNode's children position still less than what we are searching for,
+// we must descend the last PtNode's children (for example, if the word we are searching for starts
+// with a z, it's the last PtNode of the root array, so all children addresses will be smaller
+// than the position we look for, and we have to descend the z PtNode).
+/* Parameters :
+ * ptNodePos: the byte position of the terminal PtNode of the word we are searching for (this is
+ * what is stored as the "bigram position" in each bigram)
+ * outCodePoints: an array to write the found word, with MAX_WORD_LENGTH size.
+ * outUnigramProbability: a pointer to an int to write the probability into.
+ * Return value : the code point count, of 0 if the word was not found.
+ */
+// TODO: Split this function to be more readable
+int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
+ const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
+ int *const outUnigramProbability) const {
+ int pos = getRootPosition();
+ int wordPos = 0;
+ // One iteration of the outer loop iterates through PtNode arrays. As stated above, we will
+ // only traverse PtNodes that are actually a part of the terminal we are searching, so each
+ // time we enter this loop we are one depth level further than last time.
+ // The only reason we count PtNodes is because we want to reduce the probability of infinite
+ // looping in case there is a bug. Since we know there is an upper bound to the depth we are
+ // supposed to traverse, it does not hurt to count iterations.
+ for (int loopCount = maxCodePointCount; loopCount > 0; --loopCount) {
+ int lastCandidatePtNodePos = 0;
+ // Let's loop through PtNodes in this PtNode array searching for either the terminal
+ // or one of its ascendants.
+ if (pos < 0 || pos >= mDictBufferSize) {
+ AKLOGE("PtNode array position is invalid. pos: %d, dict size: %d",
+ pos, mDictBufferSize);
+ mIsCorrupted = true;
+ ASSERT(false);
+ *outUnigramProbability = NOT_A_PROBABILITY;
+ return 0;
+ }
+ for (int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
+ mDictRoot, &pos); ptNodeCount > 0; --ptNodeCount) {
+ const int startPos = pos;
+ if (pos < 0 || pos >= mDictBufferSize) {
+ AKLOGE("PtNode position is invalid. pos: %d, dict size: %d", pos, mDictBufferSize);
+ mIsCorrupted = true;
+ ASSERT(false);
+ *outUnigramProbability = NOT_A_PROBABILITY;
+ return 0;
+ }
+ const PatriciaTrieReadingUtils::NodeFlags flags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
+ const int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
+ mDictRoot, &pos);
+ if (ptNodePos == startPos) {
+ // We found the position. Copy the rest of the code points in the buffer and return
+ // the length.
+ outCodePoints[wordPos] = character;
+ if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
+ int nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
+ mDictRoot, &pos);
+ // We count code points in order to avoid infinite loops if the file is broken
+ // or if there is some other bug
+ int charCount = maxCodePointCount;
+ while (NOT_A_CODE_POINT != nextChar && --charCount > 0) {
+ outCodePoints[++wordPos] = nextChar;
+ nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
+ mDictRoot, &pos);
+ }
+ }
+ *outUnigramProbability =
+ PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot,
+ &pos);
+ return ++wordPos;
+ }
+ // We need to skip past this PtNode, so skip any remaining code points after the
+ // first and possibly the probability.
+ if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
+ PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
+ }
+ if (PatriciaTrieReadingUtils::isTerminal(flags)) {
+ PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
+ }
+ // The fact that this PtNode has children is very important. Since we already know
+ // that this PtNode does not match, if it has no children we know it is irrelevant
+ // to what we are searching for.
+ const bool hasChildren = PatriciaTrieReadingUtils::hasChildrenInFlags(flags);
+ // We will write in `found' whether we have passed the children position we are
+ // searching for. For example if we search for "beer", the children of b are less
+ // than the address we are searching for and the children of c are greater. When we
+ // come here for c, we realize this is too big, and that we should descend b.
+ bool found;
+ if (hasChildren) {
+ int currentPos = pos;
+ // Here comes the tricky part. First, read the children position.
+ const int childrenPos = PatriciaTrieReadingUtils
+ ::readChildrenPositionAndAdvancePosition(mDictRoot, flags, ¤tPos);
+ if (childrenPos > ptNodePos) {
+ // If the children pos is greater than the position, it means the previous
+ // PtNode, which position is stored in lastCandidatePtNodePos, was the right
+ // one.
+ found = true;
+ } else if (1 >= ptNodeCount) {
+ // However if we are on the LAST PtNode of this array, and we have NOT shot the
+ // position we should descend THIS PtNode. So we trick the
+ // lastCandidatePtNodePos so that we will descend this PtNode, not the previous
+ // one.
+ lastCandidatePtNodePos = startPos;
+ found = true;
+ } else {
+ // Else, we should continue looking.
+ found = false;
+ }
+ } else {
+ // Even if we don't have children here, we could still be on the last PtNode of
+ // this array. If this is the case, we should descend the last PtNode that had
+ // children, and their position is already in lastCandidatePtNodePos.
+ found = (1 >= ptNodeCount);
+ }
+
+ if (found) {
+ // Okay, we found the PtNode we should descend. Its position is in
+ // the lastCandidatePtNodePos variable, so we just re-read it.
+ if (0 != lastCandidatePtNodePos) {
+ const PatriciaTrieReadingUtils::NodeFlags lastFlags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(
+ mDictRoot, &lastCandidatePtNodePos);
+ const int lastChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
+ mDictRoot, &lastCandidatePtNodePos);
+ // We copy all the characters in this PtNode to the buffer
+ outCodePoints[wordPos] = lastChar;
+ if (PatriciaTrieReadingUtils::hasMultipleChars(lastFlags)) {
+ int nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
+ mDictRoot, &lastCandidatePtNodePos);
+ int charCount = maxCodePointCount;
+ while (-1 != nextChar && --charCount > 0) {
+ outCodePoints[++wordPos] = nextChar;
+ nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
+ mDictRoot, &lastCandidatePtNodePos);
+ }
+ }
+ ++wordPos;
+ // Now we only need to branch to the children address. Skip the probability if
+ // it's there, read pos, and break to resume the search at pos.
+ if (PatriciaTrieReadingUtils::isTerminal(lastFlags)) {
+ PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot,
+ &lastCandidatePtNodePos);
+ }
+ pos = PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
+ mDictRoot, lastFlags, &lastCandidatePtNodePos);
+ break;
+ } else {
+ // Here is a little tricky part: we come here if we found out that all children
+ // addresses in this PtNode are bigger than the address we are searching for.
+ // Should we conclude the word is not in the dictionary? No! It could still be
+ // one of the remaining PtNodes in this array, so we have to keep looking in
+ // this array until we find it (or we realize it's not there either, in which
+ // case it's actually not in the dictionary). Pass the end of this PtNode,
+ // ready to start the next one.
+ if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
+ PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
+ mDictRoot, flags, &pos);
+ }
+ if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
+ mShortcutListPolicy.skipAllShortcuts(&pos);
+ }
+ if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
+ if (!mBigramListPolicy.skipAllBigrams(&pos)) {
+ AKLOGE("Cannot skip bigrams. BufSize: %d, pos: %d.", mDictBufferSize,
+ pos);
+ mIsCorrupted = true;
+ ASSERT(false);
+ *outUnigramProbability = NOT_A_PROBABILITY;
+ return 0;
+ }
+ }
+ }
+ } else {
+ // If we did not find it, we should record the last children address for the next
+ // iteration.
+ if (hasChildren) lastCandidatePtNodePos = startPos;
+ // Now skip the end of this PtNode (children pos and the attributes if any) so that
+ // our pos is after the end of this PtNode, at the start of the next one.
+ if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
+ PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
+ mDictRoot, flags, &pos);
+ }
+ if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
+ mShortcutListPolicy.skipAllShortcuts(&pos);
+ }
+ if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
+ if (!mBigramListPolicy.skipAllBigrams(&pos)) {
+ AKLOGE("Cannot skip bigrams. BufSize: %d, pos: %d.", mDictBufferSize, pos);
+ mIsCorrupted = true;
+ ASSERT(false);
+ *outUnigramProbability = NOT_A_PROBABILITY;
+ return 0;
+ }
+ }
+ }
+
+ }
+ }
+ // If we have looked through all the PtNodes and found no match, the ptNodePos is
+ // not the position of a terminal in this dictionary.
+ return 0;
+}
+
+// This function gets the position of the terminal PtNode of the exact matching word in the
+// dictionary. If no match is found, it returns NOT_A_DICT_POS.
+int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
+ const int length, const bool forceLowerCaseSearch) const {
+ DynamicPtReadingHelper readingHelper(&mPtNodeReader, &mPtNodeArrayReader);
+ readingHelper.initWithPtNodeArrayPos(getRootPosition());
+ const int ptNodePos =
+ readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
+ if (readingHelper.isError()) {
+ mIsCorrupted = true;
+ AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
+ }
+ return ptNodePos;
+}
+
+int PatriciaTriePolicy::getProbability(const int unigramProbability,
+ const int bigramProbability) const {
+ // Due to space constraints, the probability for bigrams is approximate - the lower the unigram
+ // probability, the worse the precision. The theoritical maximum error in resulting probability
+ // is 8 - although in the practice it's never bigger than 3 or 4 in very bad cases. This means
+ // that sometimes, we'll see some bigrams interverted here, but it can't get too bad.
+ if (unigramProbability == NOT_A_PROBABILITY) {
+ return NOT_A_PROBABILITY;
+ } else if (bigramProbability == NOT_A_PROBABILITY) {
+ return ProbabilityUtils::backoff(unigramProbability);
+ } else {
+ return ProbabilityUtils::computeProbabilityForBigram(unigramProbability,
+ bigramProbability);
+ }
+}
+
+int PatriciaTriePolicy::getProbabilityOfPtNode(const int *const prevWordsPtNodePos,
+ const int ptNodePos) const {
+ if (ptNodePos == NOT_A_DICT_POS) {
+ return NOT_A_PROBABILITY;
+ }
+ const PtNodeParams ptNodeParams =
+ mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
+ if (ptNodeParams.isNotAWord() || ptNodeParams.isBlacklisted()) {
+ // If this is not a word, or if it's a blacklisted entry, it should behave as
+ // having no probability outside of the suggestion process (where it should be used
+ // for shortcuts).
+ return NOT_A_PROBABILITY;
+ }
+ if (prevWordsPtNodePos) {
+ const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]);
+ BinaryDictionaryBigramsIterator bigramsIt(&mBigramListPolicy, bigramsPosition);
+ while (bigramsIt.hasNext()) {
+ bigramsIt.next();
+ if (bigramsIt.getBigramPos() == ptNodePos
+ && bigramsIt.getProbability() != NOT_A_PROBABILITY) {
+ return getProbability(ptNodeParams.getProbability(), bigramsIt.getProbability());
+ }
+ }
+ return NOT_A_PROBABILITY;
+ }
+ return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
+}
+
+void PatriciaTriePolicy::iterateNgramEntries(const int *const prevWordsPtNodePos,
+ NgramListener *const listener) const {
+ if (!prevWordsPtNodePos) {
+ return;
+ }
+ const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]);
+ BinaryDictionaryBigramsIterator bigramsIt(&mBigramListPolicy, bigramsPosition);
+ while (bigramsIt.hasNext()) {
+ bigramsIt.next();
+ listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos());
+ }
+}
+
+int PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
+ if (ptNodePos == NOT_A_DICT_POS) {
+ return NOT_A_DICT_POS;
+ }
+ return mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos).getShortcutPos();
+}
+
+int PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
+ if (ptNodePos == NOT_A_DICT_POS) {
+ return NOT_A_DICT_POS;
+ }
+ return mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos).getBigramsPos();
+}
+
+int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode,
+ const int ptNodePos, DicNodeVector *childDicNodes) const {
+ PatriciaTrieReadingUtils::NodeFlags flags;
+ int mergedNodeCodePointCount = 0;
+ int mergedNodeCodePoints[MAX_WORD_LENGTH];
+ int probability = NOT_A_PROBABILITY;
+ int childrenPos = NOT_A_DICT_POS;
+ int shortcutPos = NOT_A_DICT_POS;
+ int bigramPos = NOT_A_DICT_POS;
+ int siblingPos = NOT_A_DICT_POS;
+ PatriciaTrieReadingUtils::readPtNodeInfo(mDictRoot, ptNodePos, getShortcutsStructurePolicy(),
+ &mBigramListPolicy, &flags, &mergedNodeCodePointCount, mergedNodeCodePoints,
+ &probability, &childrenPos, &shortcutPos, &bigramPos, &siblingPos);
+ // Skip PtNodes don't start with Unicode code point because they represent non-word information.
+ if (CharUtils::isInUnicodeSpace(mergedNodeCodePoints[0])) {
+ childDicNodes->pushLeavingChild(dicNode, ptNodePos, childrenPos, probability,
+ PatriciaTrieReadingUtils::isTerminal(flags),
+ PatriciaTrieReadingUtils::hasChildrenInFlags(flags),
+ PatriciaTrieReadingUtils::isBlacklisted(flags)
+ || PatriciaTrieReadingUtils::isNotAWord(flags),
+ mergedNodeCodePointCount, mergedNodeCodePoints);
+ }
+ return siblingPos;
+}
+
+const WordProperty PatriciaTriePolicy::getWordProperty(const int *const codePoints,
+ const int codePointCount) const {
+ const int ptNodePos = getTerminalPtNodePositionOfWord(codePoints, codePointCount,
+ false /* forceLowerCaseSearch */);
+ if (ptNodePos == NOT_A_DICT_POS) {
+ AKLOGE("getWordProperty was called for invalid word.");
+ return WordProperty();
+ }
+ const PtNodeParams ptNodeParams =
+ mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
+ std::vector<int> codePointVector(ptNodeParams.getCodePoints(),
+ ptNodeParams.getCodePoints() + ptNodeParams.getCodePointCount());
+ // Fetch bigram information.
+ std::vector<BigramProperty> bigrams;
+ const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos);
+ int bigramWord1CodePoints[MAX_WORD_LENGTH];
+ BinaryDictionaryBigramsIterator bigramsIt(&mBigramListPolicy, bigramListPos);
+ while (bigramsIt.hasNext()) {
+ // Fetch the next bigram information and forward the iterator.
+ bigramsIt.next();
+ // Skip the entry if the entry has been deleted. This never happens for ver2 dicts.
+ if (bigramsIt.getBigramPos() != NOT_A_DICT_POS) {
+ int word1Probability = NOT_A_PROBABILITY;
+ const int word1CodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
+ bigramsIt.getBigramPos(), MAX_WORD_LENGTH, bigramWord1CodePoints,
+ &word1Probability);
+ const std::vector<int> word1(bigramWord1CodePoints,
+ bigramWord1CodePoints + word1CodePointCount);
+ const int probability = getProbability(word1Probability, bigramsIt.getProbability());
+ bigrams.emplace_back(&word1, probability,
+ NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */);
+ }
+ }
+ // Fetch shortcut information.
+ std::vector<UnigramProperty::ShortcutProperty> shortcuts;
+ int shortcutPos = getShortcutPositionOfPtNode(ptNodePos);
+ if (shortcutPos != NOT_A_DICT_POS) {
+ int shortcutTargetCodePoints[MAX_WORD_LENGTH];
+ ShortcutListReadingUtils::getShortcutListSizeAndForwardPointer(mDictRoot, &shortcutPos);
+ bool hasNext = true;
+ while (hasNext) {
+ const ShortcutListReadingUtils::ShortcutFlags shortcutFlags =
+ ShortcutListReadingUtils::getFlagsAndForwardPointer(mDictRoot, &shortcutPos);
+ hasNext = ShortcutListReadingUtils::hasNext(shortcutFlags);
+ const int shortcutTargetLength = ShortcutListReadingUtils::readShortcutTarget(
+ mDictRoot, MAX_WORD_LENGTH, shortcutTargetCodePoints, &shortcutPos);
+ const std::vector<int> shortcutTarget(shortcutTargetCodePoints,
+ shortcutTargetCodePoints + shortcutTargetLength);
+ const int shortcutProbability =
+ ShortcutListReadingUtils::getProbabilityFromFlags(shortcutFlags);
+ shortcuts.emplace_back(&shortcutTarget, shortcutProbability);
+ }
+ }
+ const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
+ ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
+ NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
+ return WordProperty(&codePointVector, &unigramProperty, &bigrams);
+}
+
+int PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints,
+ int *const outCodePointCount) {
+ *outCodePointCount = 0;
+ if (token == 0) {
+ // Start iterating the dictionary.
+ mTerminalPtNodePositionsForIteratingWords.clear();
+ DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy(
+ &mTerminalPtNodePositionsForIteratingWords);
+ DynamicPtReadingHelper readingHelper(&mPtNodeReader, &mPtNodeArrayReader);
+ readingHelper.initWithPtNodeArrayPos(getRootPosition());
+ readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(&traversePolicy);
+ }
+ const int terminalPtNodePositionsVectorSize =
+ static_cast<int>(mTerminalPtNodePositionsForIteratingWords.size());
+ if (token < 0 || token >= terminalPtNodePositionsVectorSize) {
+ AKLOGE("Given token %d is invalid.", token);
+ return 0;
+ }
+ const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token];
+ int unigramProbability = NOT_A_PROBABILITY;
+ *outCodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(terminalPtNodePos,
+ MAX_WORD_LENGTH, outCodePoints, &unigramProbability);
+ const int nextToken = token + 1;
+ if (nextToken >= terminalPtNodePositionsVectorSize) {
+ // All words have been iterated.
+ mTerminalPtNodePositionsForIteratingWords.clear();
+ return 0;
+ }
+ return nextToken;
+}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
new file mode 100644
index 0000000..5b2e872
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
@@ -0,0 +1,167 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PATRICIA_TRIE_POLICY_H
+#define LATINIME_PATRICIA_TRIE_POLICY_H
+
+#include <cstdint>
+#include <vector>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/header/header_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v2/shortcut/shortcut_list_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/format_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+#include "third_party/android_prediction/utils/byte_array_view.h"
+
+namespace latinime {
+
+class DicNode;
+class DicNodeVector;
+
+class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
+ public:
+ PatriciaTriePolicy(MmappedBuffer::MmappedBufferPtr mmappedBuffer)
+ : mMmappedBuffer(std::move(mmappedBuffer)),
+ mHeaderPolicy(mMmappedBuffer->getReadOnlyByteArrayView().data(),
+ FormatUtils::VERSION_2),
+ mDictRoot(mMmappedBuffer->getReadOnlyByteArrayView().data()
+ + mHeaderPolicy.getSize()),
+ mDictBufferSize(mMmappedBuffer->getReadOnlyByteArrayView().size()
+ - mHeaderPolicy.getSize()),
+ mBigramListPolicy(mDictRoot, mDictBufferSize), mShortcutListPolicy(mDictRoot),
+ mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy),
+ mPtNodeArrayReader(mDictRoot, mDictBufferSize),
+ mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {}
+
+ AK_FORCE_INLINE int getRootPosition() const {
+ return 0;
+ }
+
+ void createAndGetAllChildDicNodes(const DicNode *const dicNode,
+ DicNodeVector *const childDicNodes) const;
+
+ int getCodePointsAndProbabilityAndReturnCodePointCount(
+ const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints,
+ int *const outUnigramProbability) const;
+
+ int getTerminalPtNodePositionOfWord(const int *const inWord,
+ const int length, const bool forceLowerCaseSearch) const;
+
+ int getProbability(const int unigramProbability, const int bigramProbability) const;
+
+ int getProbabilityOfPtNode(const int *const prevWordsPtNodePos, const int ptNodePos) const;
+
+ void iterateNgramEntries(const int *const prevWordsPtNodePos,
+ NgramListener *const listener) const;
+
+ int getShortcutPositionOfPtNode(const int ptNodePos) const;
+
+ const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
+ return &mHeaderPolicy;
+ }
+
+ const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
+ return &mShortcutListPolicy;
+ }
+
+ bool addUnigramEntry(const int *const word, const int length,
+ const UnigramProperty *const unigramProperty) {
+ // This method should not be called for non-updatable dictionary.
+ AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary.");
+ return false;
+ }
+
+ bool removeUnigramEntry(const int *const word, const int length) {
+ // This method should not be called for non-updatable dictionary.
+ AKLOGI("Warning: removeUnigramEntry() is called for non-updatable dictionary.");
+ return false;
+ }
+
+ bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ const BigramProperty *const bigramProperty) {
+ // This method should not be called for non-updatable dictionary.
+ AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
+ return false;
+ }
+
+ bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word,
+ const int length) {
+ // This method should not be called for non-updatable dictionary.
+ AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
+ return false;
+ }
+
+ bool flush(const char *const filePath) {
+ // This method should not be called for non-updatable dictionary.
+ AKLOGI("Warning: flush() is called for non-updatable dictionary.");
+ return false;
+ }
+
+ bool flushWithGC(const char *const filePath) {
+ // This method should not be called for non-updatable dictionary.
+ AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
+ return false;
+ }
+
+ bool needsToRunGC(const bool mindsBlockByGC) const {
+ // This method should not be called for non-updatable dictionary.
+ AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
+ return false;
+ }
+
+ void getProperty(const char *const query, const int queryLength, char *const outResult,
+ const int maxResultLength) {
+ // getProperty is not supported for this class.
+ if (maxResultLength > 0) {
+ outResult[0] = '\0';
+ }
+ }
+
+ const WordProperty getWordProperty(const int *const codePoints,
+ const int codePointCount) const;
+
+ int getNextWordAndNextToken(const int token, int *const outCodePoints,
+ int *const outCodePointCount);
+
+ bool isCorrupted() const {
+ return mIsCorrupted;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);
+
+ const MmappedBuffer::MmappedBufferPtr mMmappedBuffer;
+ const HeaderPolicy mHeaderPolicy;
+ const uint8_t *const mDictRoot;
+ const int mDictBufferSize;
+ const BigramListPolicy mBigramListPolicy;
+ const ShortcutListPolicy mShortcutListPolicy;
+ const Ver2ParticiaTrieNodeReader mPtNodeReader;
+ const Ver2PtNodeArrayReader mPtNodeArrayReader;
+ std::vector<int> mTerminalPtNodePositionsForIteratingWords;
+ mutable bool mIsCorrupted;
+
+ int getBigramsPositionOfPtNode(const int ptNodePos) const;
+ int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos,
+ DicNodeVector *const childDicNodes) const;
+};
+} // namespace latinime
+#endif // LATINIME_PATRICIA_TRIE_POLICY_H
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v2/shortcut/shortcut_list_policy.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v2/shortcut/shortcut_list_policy.h
new file mode 100644
index 0000000..e91c10b
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v2/shortcut/shortcut_list_policy.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_SHORTCUT_LIST_POLICY_H
+#define LATINIME_SHORTCUT_LIST_POLICY_H
+
+#include <cstdint>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/policy/dictionary_shortcuts_structure_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h"
+
+namespace latinime {
+
+class ShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
+ public:
+ explicit ShortcutListPolicy(const uint8_t *const shortcutBuf)
+ : mShortcutsBuf(shortcutBuf) {}
+
+ ~ShortcutListPolicy() {}
+
+ int getStartPos(const int pos) const {
+ if (pos == NOT_A_DICT_POS) {
+ return NOT_A_DICT_POS;
+ }
+ int listPos = pos;
+ ShortcutListReadingUtils::getShortcutListSizeAndForwardPointer(mShortcutsBuf, &listPos);
+ return listPos;
+ }
+
+ void getNextShortcut(const int maxCodePointCount, int *const outCodePoint,
+ int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext,
+ int *const pos) const {
+ const ShortcutListReadingUtils::ShortcutFlags flags =
+ ShortcutListReadingUtils::getFlagsAndForwardPointer(mShortcutsBuf, pos);
+ if (outHasNext) {
+ *outHasNext = ShortcutListReadingUtils::hasNext(flags);
+ }
+ if (outIsWhitelist) {
+ *outIsWhitelist = ShortcutListReadingUtils::isWhitelist(flags);
+ }
+ if (outCodePoint) {
+ *outCodePointCount = ShortcutListReadingUtils::readShortcutTarget(
+ mShortcutsBuf, maxCodePointCount, outCodePoint, pos);
+ }
+ }
+
+ void skipAllShortcuts(int *const pos) const {
+ const int shortcutListSize = ShortcutListReadingUtils
+ ::getShortcutListSizeAndForwardPointer(mShortcutsBuf, pos);
+ *pos += shortcutListSize;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(ShortcutListPolicy);
+
+ const uint8_t *const mShortcutsBuf;
+};
+} // namespace latinime
+#endif // LATINIME_SHORTCUT_LIST_POLICY_H
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp
new file mode 100644
index 0000000..74458fa
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h"
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
+
+namespace latinime {
+
+const PtNodeParams Ver2ParticiaTrieNodeReader::fetchPtNodeParamsInBufferFromPtNodePos(
+ const int ptNodePos) const {
+ if (ptNodePos < 0 || ptNodePos >= mDictSize) {
+ // Reading invalid position because of bug or broken dictionary.
+ AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d",
+ ptNodePos, mDictSize);
+ ASSERT(false);
+ return PtNodeParams();
+ }
+ PatriciaTrieReadingUtils::NodeFlags flags;
+ int mergedNodeCodePointCount = 0;
+ int mergedNodeCodePoints[MAX_WORD_LENGTH];
+ int probability = NOT_A_PROBABILITY;
+ int childrenPos = NOT_A_DICT_POS;
+ int shortcutPos = NOT_A_DICT_POS;
+ int bigramPos = NOT_A_DICT_POS;
+ int siblingPos = NOT_A_DICT_POS;
+ PatriciaTrieReadingUtils::readPtNodeInfo(mDictBuffer, ptNodePos, mShortuctPolicy,
+ mBigramPolicy, &flags, &mergedNodeCodePointCount, mergedNodeCodePoints, &probability,
+ &childrenPos, &shortcutPos, &bigramPos, &siblingPos);
+ if (mergedNodeCodePointCount <= 0) {
+ AKLOGE("Empty PtNode is not allowed. Code point count: %d", mergedNodeCodePointCount);
+ ASSERT(false);
+ return PtNodeParams();
+ }
+ return PtNodeParams(ptNodePos, flags, mergedNodeCodePointCount, mergedNodeCodePoints,
+ probability, childrenPos, shortcutPos, bigramPos, siblingPos);
+}
+
+}
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h
new file mode 100644
index 0000000..4e80a7a
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER2_PATRICIA_TRIE_NODE_READER_H
+#define LATINIME_VER2_PATRICIA_TRIE_NODE_READER_H
+
+#include <cstdint>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
+
+namespace latinime {
+
+class DictionaryBigramsStructurePolicy;
+class DictionaryShortcutsStructurePolicy;
+
+class Ver2ParticiaTrieNodeReader : public PtNodeReader {
+ public:
+ Ver2ParticiaTrieNodeReader(const uint8_t *const dictBuffer, const int dictSize,
+ const DictionaryBigramsStructurePolicy *const bigramPolicy,
+ const DictionaryShortcutsStructurePolicy *const shortcutPolicy)
+ : mDictBuffer(dictBuffer), mDictSize(dictSize), mBigramPolicy(bigramPolicy),
+ mShortuctPolicy(shortcutPolicy) {}
+
+ virtual const PtNodeParams fetchPtNodeParamsInBufferFromPtNodePos(const int ptNodePos) const;
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver2ParticiaTrieNodeReader);
+
+ const uint8_t *const mDictBuffer;
+ const int mDictSize;
+ const DictionaryBigramsStructurePolicy *const mBigramPolicy;
+ const DictionaryShortcutsStructurePolicy *const mShortuctPolicy;
+};
+} // namespace latinime
+#endif /* LATINIME_VER2_PATRICIA_TRIE_NODE_READER_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.cpp
new file mode 100644
index 0000000..ebea0c1
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h"
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
+
+namespace latinime {
+
+bool Ver2PtNodeArrayReader::readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos,
+ int *const outPtNodeCount, int *const outFirstPtNodePos) const {
+ if (ptNodeArrayPos < 0 || ptNodeArrayPos >= mDictSize) {
+ // Reading invalid position because of a bug or a broken dictionary.
+ AKLOGE("Reading PtNode array info from invalid dictionary position: %d, dict size: %d",
+ ptNodeArrayPos, mDictSize);
+ ASSERT(false);
+ return false;
+ }
+ int readingPos = ptNodeArrayPos;
+ const int ptNodeCountInArray = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
+ mDictBuffer, &readingPos);
+ *outPtNodeCount = ptNodeCountInArray;
+ *outFirstPtNodePos = readingPos;
+ return true;
+}
+
+bool Ver2PtNodeArrayReader::readForwardLinkAndReturnIfValid(const int forwordLinkPos,
+ int *const outNextPtNodeArrayPos) const {
+ if (forwordLinkPos < 0 || forwordLinkPos >= mDictSize) {
+ // Reading invalid position because of bug or broken dictionary.
+ AKLOGE("Reading forward link from invalid dictionary position: %d, dict size: %d",
+ forwordLinkPos, mDictSize);
+ ASSERT(false);
+ return false;
+ }
+ // Ver2 dicts don't have forward links.
+ *outNextPtNodeArrayPos = NOT_A_DICT_POS;
+ return true;
+}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h
new file mode 100644
index 0000000..a8f3655
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER2_PT_NODE_ARRAY_READER_H
+#define LATINIME_VER2_PT_NODE_ARRAY_READER_H
+
+#include <cstdint>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h"
+
+namespace latinime {
+
+class Ver2PtNodeArrayReader : public PtNodeArrayReader {
+ public:
+ Ver2PtNodeArrayReader(const uint8_t *const dictBuffer, const int dictSize)
+ : mDictBuffer(dictBuffer), mDictSize(dictSize) {};
+
+ virtual bool readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos,
+ int *const outPtNodeCount, int *const outFirstPtNodePos) const;
+ virtual bool readForwardLinkAndReturnIfValid(const int forwordLinkPos,
+ int *const outNextPtNodeArrayPos) const;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(Ver2PtNodeArrayReader);
+
+ const uint8_t *const mDictBuffer;
+ const int mDictSize;
+};
+} // namespace latinime
+#endif /* LATINIME_VER2_PT_NODE_ARRAY_READER_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp
new file mode 100644
index 0000000..91f0f35
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp
@@ -0,0 +1,282 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h"
+
+#include "third_party/android_prediction/suggest/core/dictionary/property/bigram_property.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/header/header_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+
+namespace latinime {
+
+void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
+ bool *const outHasNext, int *const bigramEntryPos) const {
+ const BigramEntry bigramEntry =
+ mBigramDictContent->getBigramEntryAndAdvancePosition(bigramEntryPos);
+ if (outBigramPos) {
+ // Lookup target PtNode position.
+ *outBigramPos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
+ bigramEntry.getTargetTerminalId());
+ }
+ if (outProbability) {
+ if (bigramEntry.hasHistoricalInfo()) {
+ *outProbability =
+ ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo(),
+ mHeaderPolicy);
+ } else {
+ *outProbability = bigramEntry.getProbability();
+ }
+ }
+ if (outHasNext) {
+ *outHasNext = bigramEntry.hasNext();
+ }
+}
+
+bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId,
+ const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) {
+ // 1. The word has no bigrams yet.
+ // 2. The word has bigrams, and there is the target in the list.
+ // 3. The word has bigrams, and there is an invalid entry that can be reclaimed.
+ // 4. The word has bigrams. We have to append new bigram entry to the list.
+ // 5. Same as 4, but the list is the last entry of the content file.
+ if (outAddedNewEntry) {
+ *outAddedNewEntry = false;
+ }
+ const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ if (bigramListPos == NOT_A_DICT_POS) {
+ // Case 1. PtNode that doesn't have a bigram list.
+ // Create new bigram list.
+ if (!mBigramDictContent->createNewBigramList(terminalId)) {
+ return false;
+ }
+ const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
+ newTargetTerminalId);
+ const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(&newBigramEntry,
+ bigramProperty);
+ // Write an entry.
+ int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(&bigramEntryToWrite,
+ &writingPos)) {
+ AKLOGE("Cannot write bigram entry. pos: %d.", writingPos);
+ return false;
+ }
+ if (!mBigramDictContent->writeTerminator(writingPos)) {
+ AKLOGE("Cannot write bigram list terminator. pos: %d.", writingPos);
+ return false;
+ }
+ if (outAddedNewEntry) {
+ *outAddedNewEntry = true;
+ }
+ return true;
+ }
+
+ int tailEntryPos = NOT_A_DICT_POS;
+ const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos,
+ &tailEntryPos);
+ if (entryPosToUpdate == NOT_A_DICT_POS) {
+ // Case 4, 5. Add new entry to the bigram list.
+ const int contentTailPos = mBigramDictContent->getContentTailPos();
+ // If the tail entry is at the tail of content buffer, the new entry can be written without
+ // link (Case 5).
+ const bool canAppendEntry =
+ contentTailPos == tailEntryPos + mBigramDictContent->getBigramEntrySize();
+ const int newEntryPos = canAppendEntry ? tailEntryPos : contentTailPos;
+ int writingPos = newEntryPos;
+ // Write new entry at the tail position of the bigram content.
+ const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
+ newTargetTerminalId);
+ const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
+ &newBigramEntry, bigramProperty);
+ if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(&bigramEntryToWrite,
+ &writingPos)) {
+ AKLOGE("Cannot write bigram entry. pos: %d.", writingPos);
+ return false;
+ }
+ if (!mBigramDictContent->writeTerminator(writingPos)) {
+ AKLOGE("Cannot write bigram list terminator. pos: %d.", writingPos);
+ return false;
+ }
+ if (!canAppendEntry) {
+ // Update link of the current tail entry.
+ if (!mBigramDictContent->writeLink(newEntryPos, tailEntryPos)) {
+ AKLOGE("Cannot update bigram entry link. pos: %d, linked entry pos: %d.",
+ tailEntryPos, newEntryPos);
+ return false;
+ }
+ }
+ if (outAddedNewEntry) {
+ *outAddedNewEntry = true;
+ }
+ return true;
+ }
+
+ // Case 2. Overwrite the existing entry. Case 3. Reclaim and reuse the existing invalid entry.
+ const BigramEntry originalBigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
+ if (!originalBigramEntry.isValid()) {
+ // Case 3. Reuse the existing invalid entry. outAddedNewEntry is false when an existing
+ // entry is updated.
+ if (outAddedNewEntry) {
+ *outAddedNewEntry = true;
+ }
+ }
+ const BigramEntry updatedBigramEntry =
+ originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId);
+ const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
+ &updatedBigramEntry, bigramProperty);
+ return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate);
+}
+
+bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) {
+ const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ if (bigramListPos == NOT_A_DICT_POS) {
+ // Bigram list doesn't exist.
+ return false;
+ }
+ const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos,
+ nullptr /* outTailEntryPos */);
+ if (entryPosToUpdate == NOT_A_DICT_POS) {
+ // Bigram entry doesn't exist.
+ return false;
+ }
+ const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
+ if (targetTerminalId != bigramEntry.getTargetTerminalId()) {
+ // Bigram entry doesn't exist.
+ return false;
+ }
+ // Remove bigram entry by marking it as invalid entry and overwriting the original entry.
+ const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
+ return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPosToUpdate);
+}
+
+bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,
+ int *const outBigramCount) {
+ const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ if (bigramListPos == NOT_A_DICT_POS) {
+ // Bigram list doesn't exist.
+ return true;
+ }
+ bool hasNext = true;
+ int readingPos = bigramListPos;
+ while (hasNext) {
+ const BigramEntry bigramEntry =
+ mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
+ const int entryPos = readingPos - mBigramDictContent->getBigramEntrySize();
+ hasNext = bigramEntry.hasNext();
+ if (!bigramEntry.isValid()) {
+ continue;
+ }
+ const int targetPtNodePos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
+ bigramEntry.getTargetTerminalId());
+ if (targetPtNodePos == NOT_A_DICT_POS) {
+ // Invalidate bigram entry.
+ const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
+ if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
+ return false;
+ }
+ } else if (bigramEntry.hasHistoricalInfo()) {
+ const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
+ bigramEntry.getHistoricalInfo(), mHeaderPolicy);
+ if (ForgettingCurveUtils::needsToKeep(&historicalInfo, mHeaderPolicy)) {
+ const BigramEntry updatedBigramEntry =
+ bigramEntry.updateHistoricalInfoAndGetEntry(&historicalInfo);
+ if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
+ return false;
+ }
+ *outBigramCount += 1;
+ } else {
+ // Remove entry.
+ const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
+ if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
+ return false;
+ }
+ }
+ } else {
+ *outBigramCount += 1;
+ }
+ }
+ return true;
+}
+
+int Ver4BigramListPolicy::getBigramEntryConut(const int terminalId) {
+ const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ if (bigramListPos == NOT_A_DICT_POS) {
+ // Bigram list doesn't exist.
+ return 0;
+ }
+ int bigramCount = 0;
+ bool hasNext = true;
+ int readingPos = bigramListPos;
+ while (hasNext) {
+ const BigramEntry bigramEntry =
+ mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
+ hasNext = bigramEntry.hasNext();
+ if (bigramEntry.isValid()) {
+ bigramCount++;
+ }
+ }
+ return bigramCount;
+}
+
+int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
+ const int bigramListPos, int *const outTailEntryPos) const {
+ if (outTailEntryPos) {
+ *outTailEntryPos = NOT_A_DICT_POS;
+ }
+ int invalidEntryPos = NOT_A_DICT_POS;
+ int readingPos = bigramListPos;
+ while (true) {
+ const BigramEntry bigramEntry =
+ mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
+ const int entryPos = readingPos - mBigramDictContent->getBigramEntrySize();
+ if (!bigramEntry.hasNext()) {
+ if (outTailEntryPos) {
+ *outTailEntryPos = entryPos;
+ }
+ break;
+ }
+ if (bigramEntry.getTargetTerminalId() == targetTerminalIdToFind) {
+ // Entry with same target is found.
+ return entryPos;
+ } else if (!bigramEntry.isValid()) {
+ // Invalid entry that can be reused is found.
+ invalidEntryPos = entryPos;
+ }
+ }
+ return invalidEntryPos;
+}
+
+const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
+ const BigramEntry *const originalBigramEntry,
+ const BigramProperty *const bigramProperty) const {
+ // TODO: Consolidate historical info and probability.
+ if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
+ const HistoricalInfo historicalInfoForUpdate(bigramProperty->getTimestamp(),
+ bigramProperty->getLevel(), bigramProperty->getCount());
+ const HistoricalInfo updatedHistoricalInfo =
+ ForgettingCurveUtils::createUpdatedHistoricalInfo(
+ originalBigramEntry->getHistoricalInfo(), bigramProperty->getProbability(),
+ &historicalInfoForUpdate, mHeaderPolicy);
+ return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
+ } else {
+ return originalBigramEntry->updateProbabilityAndGetEntry(bigramProperty->getProbability());
+ }
+}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h
new file mode 100644
index 0000000..f348638
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_BIGRAM_LIST_POLICY_H
+#define LATINIME_VER4_BIGRAM_LIST_POLICY_H
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/policy/dictionary_bigrams_structure_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h"
+
+namespace latinime {
+
+class BigramDictContent;
+class BigramProperty;
+class HeaderPolicy;
+class TerminalPositionLookupTable;
+
+class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
+ public:
+ Ver4BigramListPolicy(BigramDictContent *const bigramDictContent,
+ const TerminalPositionLookupTable *const terminalPositionLookupTable,
+ const HeaderPolicy *const headerPolicy)
+ : mBigramDictContent(bigramDictContent),
+ mTerminalPositionLookupTable(terminalPositionLookupTable),
+ mHeaderPolicy(headerPolicy) {}
+
+ void getNextBigram(int *const outBigramPos, int *const outProbability,
+ bool *const outHasNext, int *const bigramEntryPos) const;
+
+ bool skipAllBigrams(int *const pos) const {
+ // Do nothing because we don't need to skip bigram lists in ver4 dictionaries.
+ return true;
+ }
+
+ bool addNewEntry(const int terminalId, const int newTargetTerminalId,
+ const BigramProperty *const bigramProperty, bool *const outAddedNewEntry);
+
+ bool removeEntry(const int terminalId, const int targetTerminalId);
+
+ bool updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,
+ int *const outBigramCount);
+
+ int getBigramEntryConut(const int terminalId);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy);
+
+ int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos,
+ int *const outTailEntryPos) const;
+
+ const BigramEntry createUpdatedBigramEntryFrom(const BigramEntry *const originalBigramEntry,
+ const BigramProperty *const bigramProperty) const;
+
+ BigramDictContent *const mBigramDictContent;
+ const TerminalPositionLookupTable *const mTerminalPositionLookupTable;
+ const HeaderPolicy *const mHeaderPolicy;
+};
+} // namespace latinime
+#endif /* LATINIME_VER4_BIGRAM_LIST_POLICY_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
new file mode 100644
index 0000000..ffb3791
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
@@ -0,0 +1,219 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+const int BigramDictContent::INVALID_LINKED_ENTRY_POS = Ver4DictConstants::NOT_A_TERMINAL_ID;
+
+const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
+ int *const bigramEntryPos) const {
+ const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer();
+ const int bigramEntryTailPos = (*bigramEntryPos) + getBigramEntrySize();
+ if (*bigramEntryPos < 0 || bigramEntryTailPos > bigramListBuffer->getTailPosition()) {
+ AKLOGE("Invalid bigram entry position. bigramEntryPos: %d, bigramEntryTailPos: %d, "
+ "bufSize: %d", *bigramEntryPos, bigramEntryTailPos,
+ bigramListBuffer->getTailPosition());
+ ASSERT(false);
+ return BigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
+ Ver4DictConstants::NOT_A_TERMINAL_ID);
+ }
+ const int bigramFlags = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos);
+ const bool isLink = (bigramFlags & Ver4DictConstants::BIGRAM_IS_LINK_MASK) != 0;
+ int probability = NOT_A_PROBABILITY;
+ int timestamp = NOT_A_TIMESTAMP;
+ int level = 0;
+ int count = 0;
+ if (mHasHistoricalInfo) {
+ timestamp = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::TIME_STAMP_FIELD_SIZE, bigramEntryPos);
+ level = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, bigramEntryPos);
+ count = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::WORD_COUNT_FIELD_SIZE, bigramEntryPos);
+ } else {
+ probability = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::PROBABILITY_SIZE, bigramEntryPos);
+ }
+ const int encodedTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos);
+ const int targetTerminalId =
+ (encodedTargetTerminalId == Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID) ?
+ Ver4DictConstants::NOT_A_TERMINAL_ID : encodedTargetTerminalId;
+ if (isLink) {
+ const int linkedEntryPos = targetTerminalId;
+ if (linkedEntryPos == INVALID_LINKED_ENTRY_POS) {
+ // Bigram list terminator is found.
+ return BigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
+ Ver4DictConstants::NOT_A_TERMINAL_ID);
+ }
+ *bigramEntryPos = linkedEntryPos;
+ return getBigramEntryAndAdvancePosition(bigramEntryPos);
+ }
+ // hasNext is always true because we should continue to read the next entry until the terminator
+ // is found.
+ if (mHasHistoricalInfo) {
+ const HistoricalInfo historicalInfo(timestamp, level, count);
+ return BigramEntry(true /* hasNext */, probability, &historicalInfo, targetTerminalId);
+ } else {
+ return BigramEntry(true /* hasNext */, probability, targetTerminalId);
+ }
+}
+
+bool BigramDictContent::writeBigramEntryAndAdvancePosition(
+ const BigramEntry *const bigramEntryToWrite, int *const entryWritingPos) {
+ return writeBigramEntryAttributesAndAdvancePosition(false /* isLink */,
+ bigramEntryToWrite->getProbability(), bigramEntryToWrite->getTargetTerminalId(),
+ bigramEntryToWrite->getHistoricalInfo()->getTimeStamp(),
+ bigramEntryToWrite->getHistoricalInfo()->getLevel(),
+ bigramEntryToWrite->getHistoricalInfo()->getCount(),
+ entryWritingPos);
+}
+
+bool BigramDictContent::writeBigramEntryAttributesAndAdvancePosition(
+ const bool isLink, const int probability, const int targetTerminalId,
+ const int timestamp, const int level, const int count, int *const entryWritingPos) {
+ BufferWithExtendableBuffer *const bigramListBuffer = getWritableContentBuffer();
+ const int bigramFlags = isLink ? Ver4DictConstants::BIGRAM_IS_LINK_MASK : 0;
+ if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags,
+ Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram flags. pos: %d, flags: %x", *entryWritingPos, bigramFlags);
+ return false;
+ }
+ if (mHasHistoricalInfo) {
+ if (!bigramListBuffer->writeUintAndAdvancePosition(timestamp,
+ Ver4DictConstants::TIME_STAMP_FIELD_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram timestamps. pos: %d, timestamp: %d", *entryWritingPos,
+ timestamp);
+ return false;
+ }
+ if (!bigramListBuffer->writeUintAndAdvancePosition(level,
+ Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram level. pos: %d, level: %d", *entryWritingPos,
+ level);
+ return false;
+ }
+ if (!bigramListBuffer->writeUintAndAdvancePosition(count,
+ Ver4DictConstants::WORD_COUNT_FIELD_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram count. pos: %d, count: %d", *entryWritingPos,
+ count);
+ return false;
+ }
+ } else {
+ if (!bigramListBuffer->writeUintAndAdvancePosition(probability,
+ Ver4DictConstants::PROBABILITY_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram probability. pos: %d, probability: %d", *entryWritingPos,
+ probability);
+ return false;
+ }
+ }
+ const int targetTerminalIdToWrite = (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) ?
+ Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID : targetTerminalId;
+ if (!bigramListBuffer->writeUintAndAdvancePosition(targetTerminalIdToWrite,
+ Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram target terminal id. pos: %d, target terminal id: %d",
+ *entryWritingPos, targetTerminalId);
+ return false;
+ }
+ return true;
+}
+
+bool BigramDictContent::writeLink(const int linkedEntryPos, const int writingPos) {
+ const int targetTerminalId = linkedEntryPos;
+ int pos = writingPos;
+ return writeBigramEntryAttributesAndAdvancePosition(true /* isLink */,
+ NOT_A_PROBABILITY /* probability */, targetTerminalId, NOT_A_TIMESTAMP, 0 /* level */,
+ 0 /* count */, &pos);
+}
+
+bool BigramDictContent::runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const BigramDictContent *const originalBigramDictContent,
+ int *const outBigramEntryCount) {
+ for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin();
+ it != terminalIdMap->end(); ++it) {
+ const int originalBigramListPos =
+ originalBigramDictContent->getBigramListHeadPos(it->first);
+ if (originalBigramListPos == NOT_A_DICT_POS) {
+ // This terminal does not have a bigram list.
+ continue;
+ }
+ const int bigramListPos = getContentBuffer()->getTailPosition();
+ int bigramEntryCount = 0;
+ // Copy bigram list with GC from original content.
+ if (!runGCBigramList(originalBigramListPos, originalBigramDictContent, bigramListPos,
+ terminalIdMap, &bigramEntryCount)) {
+ AKLOGE("Cannot complete GC for the bigram list. original pos: %d, pos: %d",
+ originalBigramListPos, bigramListPos);
+ return false;
+ }
+ if (bigramEntryCount == 0) {
+ // All bigram entries are useless. This terminal does not have a bigram list.
+ continue;
+ }
+ *outBigramEntryCount += bigramEntryCount;
+ // Set bigram list position to the lookup table.
+ if (!getUpdatableAddressLookupTable()->set(it->second, bigramListPos)) {
+ AKLOGE("Cannot set bigram list position. terminal id: %d, pos: %d",
+ it->second, bigramListPos);
+ return false;
+ }
+ }
+ return true;
+}
+
+// Returns whether GC for the bigram list was succeeded or not.
+bool BigramDictContent::runGCBigramList(const int bigramListPos,
+ const BigramDictContent *const sourceBigramDictContent, const int toPos,
+ const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ int *const outEntryCount) {
+ bool hasNext = true;
+ int readingPos = bigramListPos;
+ int writingPos = toPos;
+ while (hasNext) {
+ const BigramEntry originalBigramEntry =
+ sourceBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
+ hasNext = originalBigramEntry.hasNext();
+ if (!originalBigramEntry.isValid()) {
+ continue;
+ }
+ TerminalPositionLookupTable::TerminalIdMap::const_iterator it =
+ terminalIdMap->find(originalBigramEntry.getTargetTerminalId());
+ if (it == terminalIdMap->end()) {
+ // Target word has been removed.
+ continue;
+ }
+ const BigramEntry updatedBigramEntry =
+ originalBigramEntry.updateTargetTerminalIdAndGetEntry(it->second);
+ if (!writeBigramEntryAndAdvancePosition(&updatedBigramEntry, &writingPos)) {
+ AKLOGE("Cannot write bigram entry to run GC. pos: %d", writingPos);
+ return false;
+ }
+ *outEntryCount += 1;
+ }
+ if (*outEntryCount > 0) {
+ if (!writeTerminator(writingPos)) {
+ AKLOGE("Cannot write terminator to run GC. pos: %d", writingPos);
+ return false;
+ }
+ }
+ return true;
+}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
new file mode 100644
index 0000000..48c20ba
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_BIGRAM_DICT_CONTENT_H
+#define LATINIME_BIGRAM_DICT_CONTENT_H
+
+#include <cstdint>
+#include <cstdio>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+
+namespace latinime {
+
+class BigramDictContent : public SparseTableDictContent {
+ public:
+ BigramDictContent(uint8_t *const *buffers, const int *bufferSizes, const bool hasHistoricalInfo)
+ : SparseTableDictContent(buffers, bufferSizes,
+ Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
+ Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
+ mHasHistoricalInfo(hasHistoricalInfo) {}
+
+ BigramDictContent(const bool hasHistoricalInfo)
+ : SparseTableDictContent(Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
+ Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
+ mHasHistoricalInfo(hasHistoricalInfo) {}
+
+ int getContentTailPos() const {
+ return getContentBuffer()->getTailPosition();
+ }
+
+ const BigramEntry getBigramEntry(const int bigramEntryPos) const {
+ int readingPos = bigramEntryPos;
+ return getBigramEntryAndAdvancePosition(&readingPos);
+ }
+
+ const BigramEntry getBigramEntryAndAdvancePosition(int *const bigramEntryPos) const;
+
+ // Returns head position of bigram list for a PtNode specified by terminalId.
+ int getBigramListHeadPos(const int terminalId) const {
+ const SparseTable *const addressLookupTable = getAddressLookupTable();
+ if (!addressLookupTable->contains(terminalId)) {
+ return NOT_A_DICT_POS;
+ }
+ return addressLookupTable->get(terminalId);
+ }
+
+ bool writeBigramEntryAtTail(const BigramEntry *const bigramEntryToWrite) {
+ int writingPos = getContentBuffer()->getTailPosition();
+ return writeBigramEntryAndAdvancePosition(bigramEntryToWrite, &writingPos);
+ }
+
+ bool writeBigramEntry(const BigramEntry *const bigramEntryToWrite, const int entryWritingPos) {
+ int writingPos = entryWritingPos;
+ return writeBigramEntryAndAdvancePosition(bigramEntryToWrite, &writingPos);
+ }
+
+ bool writeBigramEntryAndAdvancePosition(const BigramEntry *const bigramEntryToWrite,
+ int *const entryWritingPos);
+
+ bool writeTerminator(const int writingPos) {
+ // Terminator is a link to the invalid position.
+ return writeLink(INVALID_LINKED_ENTRY_POS, writingPos);
+ }
+
+ bool writeLink(const int linkedPos, const int writingPos);
+
+ bool createNewBigramList(const int terminalId) {
+ const int bigramListPos = getContentBuffer()->getTailPosition();
+ return getUpdatableAddressLookupTable()->set(terminalId, bigramListPos);
+ }
+
+ bool flushToFile(FILE *const file) const {
+ return flush(file);
+ }
+
+ bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const BigramDictContent *const originalBigramDictContent,
+ int *const outBigramEntryCount);
+
+ int getBigramEntrySize() const {
+ if (mHasHistoricalInfo) {
+ return Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE
+ + Ver4DictConstants::TIME_STAMP_FIELD_SIZE
+ + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
+ + Ver4DictConstants::WORD_COUNT_FIELD_SIZE
+ + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
+ } else {
+ return Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE
+ + Ver4DictConstants::PROBABILITY_SIZE
+ + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
+ }
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(BigramDictContent);
+
+ static const int INVALID_LINKED_ENTRY_POS;
+
+ bool writeBigramEntryAttributesAndAdvancePosition(
+ const bool isLink, const int probability, const int targetTerminalId,
+ const int timestamp, const int level, const int count, int *const entryWritingPos);
+
+ bool runGCBigramList(const int bigramListPos,
+ const BigramDictContent *const sourceBigramDictContent, const int toPos,
+ const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ int *const outEntryCount);
+
+ bool mHasHistoricalInfo;
+};
+} // namespace latinime
+#endif /* LATINIME_BIGRAM_DICT_CONTENT_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h
new file mode 100644
index 0000000..b581e49
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_BIGRAM_ENTRY_H
+#define LATINIME_BIGRAM_ENTRY_H
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/historical_info.h"
+
+namespace latinime {
+
+class BigramEntry {
+ public:
+ BigramEntry(const BigramEntry& bigramEntry)
+ : mHasNext(bigramEntry.mHasNext), mProbability(bigramEntry.mProbability),
+ mHistoricalInfo(), mTargetTerminalId(bigramEntry.mTargetTerminalId) {}
+
+ // Entry with historical information.
+ BigramEntry(const bool hasNext, const int probability, const int targetTerminalId)
+ : mHasNext(hasNext), mProbability(probability), mHistoricalInfo(),
+ mTargetTerminalId(targetTerminalId) {}
+
+ // Entry with historical information.
+ BigramEntry(const bool hasNext, const int probability,
+ const HistoricalInfo *const historicalInfo, const int targetTerminalId)
+ : mHasNext(hasNext), mProbability(probability), mHistoricalInfo(*historicalInfo),
+ mTargetTerminalId(targetTerminalId) {}
+
+ const BigramEntry getInvalidatedEntry() const {
+ return updateTargetTerminalIdAndGetEntry(Ver4DictConstants::NOT_A_TERMINAL_ID);
+ }
+
+ const BigramEntry updateHasNextAndGetEntry(const bool hasNext) const {
+ return BigramEntry(hasNext, mProbability, &mHistoricalInfo, mTargetTerminalId);
+ }
+
+ const BigramEntry updateTargetTerminalIdAndGetEntry(const int newTargetTerminalId) const {
+ return BigramEntry(mHasNext, mProbability, &mHistoricalInfo, newTargetTerminalId);
+ }
+
+ const BigramEntry updateProbabilityAndGetEntry(const int probability) const {
+ return BigramEntry(mHasNext, probability, &mHistoricalInfo, mTargetTerminalId);
+ }
+
+ const BigramEntry updateHistoricalInfoAndGetEntry(
+ const HistoricalInfo *const historicalInfo) const {
+ return BigramEntry(mHasNext, mProbability, historicalInfo, mTargetTerminalId);
+ }
+
+ bool isValid() const {
+ return mTargetTerminalId != Ver4DictConstants::NOT_A_TERMINAL_ID;
+ }
+
+ bool hasNext() const {
+ return mHasNext;
+ }
+
+ int getProbability() const {
+ return mProbability;
+ }
+
+ bool hasHistoricalInfo() const {
+ return mHistoricalInfo.isValid();
+ }
+
+ const HistoricalInfo *getHistoricalInfo() const {
+ return &mHistoricalInfo;
+ }
+
+ int getTargetTerminalId() const {
+ return mTargetTerminalId;
+ }
+
+ private:
+ // Copy constructor is public to use this class as a type of return value.
+ DISALLOW_DEFAULT_CONSTRUCTOR(BigramEntry);
+ DISALLOW_ASSIGNMENT_OPERATOR(BigramEntry);
+
+ const bool mHasNext;
+ const int mProbability;
+ const HistoricalInfo mHistoricalInfo;
+ const int mTargetTerminalId;
+};
+} // namespace latinime
+#endif /* LATINIME_BIGRAM_ENTRY_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
new file mode 100644
index 0000000..b792ec0
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h"
+
+namespace latinime {
+
+bool LanguageModelDictContent::save(FILE *const file) const {
+ return mTrieMap.save(file);
+}
+
+bool LanguageModelDictContent::runGC(
+ const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const LanguageModelDictContent *const originalContent,
+ int *const outNgramCount) {
+ return runGCInner(terminalIdMap, originalContent->mTrieMap.getEntriesInRootLevel(),
+ 0 /* nextLevelBitmapEntryIndex */, outNgramCount);
+}
+
+ProbabilityEntry LanguageModelDictContent::getNgramProbabilityEntry(
+ const WordIdArrayView prevWordIds, const int wordId) const {
+ const int bitmapEntryIndex = getBitmapEntryIndex(prevWordIds);
+ if (bitmapEntryIndex == TrieMap::INVALID_INDEX) {
+ return ProbabilityEntry();
+ }
+ const TrieMap::Result result = mTrieMap.get(wordId, bitmapEntryIndex);
+ if (!result.mIsValid) {
+ // Not found.
+ return ProbabilityEntry();
+ }
+ return ProbabilityEntry::decode(result.mValue, mHasHistoricalInfo);
+}
+
+bool LanguageModelDictContent::setNgramProbabilityEntry(const WordIdArrayView prevWordIds,
+ const int terminalId, const ProbabilityEntry *const probabilityEntry) {
+ const int bitmapEntryIndex = getBitmapEntryIndex(prevWordIds);
+ if (bitmapEntryIndex == TrieMap::INVALID_INDEX) {
+ return false;
+ }
+ return mTrieMap.put(terminalId, probabilityEntry->encode(mHasHistoricalInfo), bitmapEntryIndex);
+}
+
+bool LanguageModelDictContent::runGCInner(
+ const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const TrieMap::TrieMapRange trieMapRange,
+ const int nextLevelBitmapEntryIndex, int *const outNgramCount) {
+ for (auto &entry : trieMapRange) {
+ const auto it = terminalIdMap->find(entry.key());
+ if (it == terminalIdMap->end() || it->second == Ver4DictConstants::NOT_A_TERMINAL_ID) {
+ // The word has been removed.
+ continue;
+ }
+ if (!mTrieMap.put(it->second, entry.value(), nextLevelBitmapEntryIndex)) {
+ return false;
+ }
+ if (outNgramCount) {
+ *outNgramCount += 1;
+ }
+ if (entry.hasNextLevelMap()) {
+ if (!runGCInner(terminalIdMap, entry.getEntriesInNextLevel(),
+ mTrieMap.getNextLevelBitmapEntryIndex(it->second, nextLevelBitmapEntryIndex),
+ outNgramCount)) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+int LanguageModelDictContent::getBitmapEntryIndex(const WordIdArrayView prevWordIds) const {
+ int bitmapEntryIndex = mTrieMap.getRootBitmapEntryIndex();
+ for (const int wordId : prevWordIds) {
+ const TrieMap::Result result = mTrieMap.get(wordId, bitmapEntryIndex);
+ if (!result.mIsValid) {
+ return TrieMap::INVALID_INDEX;
+ }
+ bitmapEntryIndex = result.mNextLevelBitmapEntryIndex;
+ }
+ return bitmapEntryIndex;
+}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h
new file mode 100644
index 0000000..543dc2c
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_LANGUAGE_MODEL_DICT_CONTENT_H
+#define LATINIME_LANGUAGE_MODEL_DICT_CONTENT_H
+
+#include <cstdio>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/trie_map.h"
+#include "third_party/android_prediction/utils/byte_array_view.h"
+#include "third_party/android_prediction/utils/int_array_view.h"
+
+namespace latinime {
+
+/**
+ * Class representing language model.
+ *
+ * This class provides methods to get and store unigram/n-gram probability information and flags.
+ */
+class LanguageModelDictContent {
+ public:
+ LanguageModelDictContent(const ReadWriteByteArrayView trieMapBuffer,
+ const bool hasHistoricalInfo)
+ : mTrieMap(trieMapBuffer), mHasHistoricalInfo(hasHistoricalInfo) {}
+
+ explicit LanguageModelDictContent(const bool hasHistoricalInfo)
+ : mTrieMap(), mHasHistoricalInfo(hasHistoricalInfo) {}
+
+ bool isNearSizeLimit() const {
+ return mTrieMap.isNearSizeLimit();
+ }
+
+ bool save(FILE *const file) const;
+
+ bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const LanguageModelDictContent *const originalContent,
+ int *const outNgramCount);
+
+ ProbabilityEntry getProbabilityEntry(const int wordId) const {
+ return getNgramProbabilityEntry(WordIdArrayView(), wordId);
+ }
+
+ bool setProbabilityEntry(const int wordId, const ProbabilityEntry *const probabilityEntry) {
+ return setNgramProbabilityEntry(WordIdArrayView(), wordId, probabilityEntry);
+ }
+
+ ProbabilityEntry getNgramProbabilityEntry(const WordIdArrayView prevWordIds,
+ const int wordId) const;
+
+ bool setNgramProbabilityEntry(const WordIdArrayView prevWordIds, const int wordId,
+ const ProbabilityEntry *const probabilityEntry);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(LanguageModelDictContent);
+
+ TrieMap mTrieMap;
+ const bool mHasHistoricalInfo;
+
+ bool runGCInner(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const TrieMap::TrieMapRange trieMapRange, const int nextLevelBitmapEntryIndex,
+ int *const outNgramCount);
+
+ int getBitmapEntryIndex(const WordIdArrayView prevWordIds) const;
+};
+} // namespace latinime
+#endif /* LATINIME_LANGUAGE_MODEL_DICT_CONTENT_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h
new file mode 100644
index 0000000..68fc496
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PROBABILITY_ENTRY_H
+#define LATINIME_PROBABILITY_ENTRY_H
+
+#include <climits>
+#include <cstdint>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/historical_info.h"
+
+namespace latinime {
+
+class ProbabilityEntry {
+ public:
+ ProbabilityEntry(const ProbabilityEntry &probabilityEntry)
+ : mFlags(probabilityEntry.mFlags), mProbability(probabilityEntry.mProbability),
+ mHistoricalInfo(probabilityEntry.mHistoricalInfo) {}
+
+ // Dummy entry
+ ProbabilityEntry()
+ : mFlags(0), mProbability(NOT_A_PROBABILITY), mHistoricalInfo() {}
+
+ // Entry without historical information
+ ProbabilityEntry(const int flags, const int probability)
+ : mFlags(flags), mProbability(probability), mHistoricalInfo() {}
+
+ // Entry with historical information.
+ ProbabilityEntry(const int flags, const int probability,
+ const HistoricalInfo *const historicalInfo)
+ : mFlags(flags), mProbability(probability), mHistoricalInfo(*historicalInfo) {}
+
+ const ProbabilityEntry createEntryWithUpdatedProbability(const int probability) const {
+ return ProbabilityEntry(mFlags, probability, &mHistoricalInfo);
+ }
+
+ const ProbabilityEntry createEntryWithUpdatedHistoricalInfo(
+ const HistoricalInfo *const historicalInfo) const {
+ return ProbabilityEntry(mFlags, mProbability, historicalInfo);
+ }
+
+ bool hasHistoricalInfo() const {
+ return mHistoricalInfo.isValid();
+ }
+
+ int getFlags() const {
+ return mFlags;
+ }
+
+ int getProbability() const {
+ return mProbability;
+ }
+
+ const HistoricalInfo *getHistoricalInfo() const {
+ return &mHistoricalInfo;
+ }
+
+ uint64_t encode(const bool hasHistoricalInfo) const {
+ uint64_t encodedEntry = static_cast<uint64_t>(mFlags);
+ if (hasHistoricalInfo) {
+ encodedEntry = (encodedEntry << (Ver4DictConstants::TIME_STAMP_FIELD_SIZE * CHAR_BIT))
+ ^ static_cast<uint64_t>(mHistoricalInfo.getTimeStamp());
+ encodedEntry = (encodedEntry << (Ver4DictConstants::WORD_LEVEL_FIELD_SIZE * CHAR_BIT))
+ ^ static_cast<uint64_t>(mHistoricalInfo.getLevel());
+ encodedEntry = (encodedEntry << (Ver4DictConstants::WORD_COUNT_FIELD_SIZE * CHAR_BIT))
+ ^ static_cast<uint64_t>(mHistoricalInfo.getCount());
+ } else {
+ encodedEntry = (encodedEntry << (Ver4DictConstants::PROBABILITY_SIZE * CHAR_BIT))
+ ^ static_cast<uint64_t>(mProbability);
+ }
+ return encodedEntry;
+ }
+
+ static ProbabilityEntry decode(const uint64_t encodedEntry, const bool hasHistoricalInfo) {
+ if (hasHistoricalInfo) {
+ const int flags = readFromEncodedEntry(encodedEntry,
+ Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE,
+ Ver4DictConstants::TIME_STAMP_FIELD_SIZE
+ + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
+ + Ver4DictConstants::WORD_COUNT_FIELD_SIZE);
+ const int timestamp = readFromEncodedEntry(encodedEntry,
+ Ver4DictConstants::TIME_STAMP_FIELD_SIZE,
+ Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
+ + Ver4DictConstants::WORD_COUNT_FIELD_SIZE);
+ const int level = readFromEncodedEntry(encodedEntry,
+ Ver4DictConstants::WORD_LEVEL_FIELD_SIZE,
+ Ver4DictConstants::WORD_COUNT_FIELD_SIZE);
+ const int count = readFromEncodedEntry(encodedEntry,
+ Ver4DictConstants::WORD_COUNT_FIELD_SIZE, 0 /* pos */);
+ const HistoricalInfo historicalInfo(timestamp, level, count);
+ return ProbabilityEntry(flags, NOT_A_PROBABILITY, &historicalInfo);
+ } else {
+ const int flags = readFromEncodedEntry(encodedEntry,
+ Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE,
+ Ver4DictConstants::PROBABILITY_SIZE);
+ const int probability = readFromEncodedEntry(encodedEntry,
+ Ver4DictConstants::PROBABILITY_SIZE, 0 /* pos */);
+ return ProbabilityEntry(flags, probability);
+ }
+ }
+
+ private:
+ // Copy constructor is public to use this class as a type of return value.
+ DISALLOW_ASSIGNMENT_OPERATOR(ProbabilityEntry);
+
+ const int mFlags;
+ const int mProbability;
+ const HistoricalInfo mHistoricalInfo;
+
+ static int readFromEncodedEntry(const uint64_t encodedEntry, const int size, const int pos) {
+ return static_cast<int>(
+ (encodedEntry >> (pos * CHAR_BIT)) & ((1ull << (size * CHAR_BIT)) - 1));
+ }
+};
+} // namespace latinime
+#endif /* LATINIME_PROBABILITY_ENTRY_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.cpp
new file mode 100644
index 0000000..b1cd05d
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.cpp
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h"
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+void ShortcutDictContent::getShortcutEntryAndAdvancePosition(const int maxCodePointCount,
+ int *const outCodePoint, int *const outCodePointCount, int *const outProbability,
+ bool *const outhasNext, int *const shortcutEntryPos) const {
+ const BufferWithExtendableBuffer *const shortcutListBuffer = getContentBuffer();
+ if (*shortcutEntryPos < 0 || *shortcutEntryPos >= shortcutListBuffer->getTailPosition()) {
+ AKLOGE("Invalid shortcut entry position. shortcutEntryPos: %d, bufSize: %d",
+ *shortcutEntryPos, shortcutListBuffer->getTailPosition());
+ ASSERT(false);
+ if (outhasNext) {
+ *outhasNext = false;
+ }
+ if (outCodePointCount) {
+ *outCodePointCount = 0;
+ }
+ return;
+ }
+
+ const int shortcutFlags = shortcutListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos);
+ if (outProbability) {
+ *outProbability = shortcutFlags & Ver4DictConstants::SHORTCUT_PROBABILITY_MASK;
+ }
+ if (outhasNext) {
+ *outhasNext = shortcutFlags & Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK;
+ }
+ if (outCodePoint && outCodePointCount) {
+ shortcutListBuffer->readCodePointsAndAdvancePosition(
+ maxCodePointCount, outCodePoint, outCodePointCount, shortcutEntryPos);
+ }
+}
+
+int ShortcutDictContent::getShortcutListHeadPos(const int terminalId) const {
+ const SparseTable *const addressLookupTable = getAddressLookupTable();
+ if (!addressLookupTable->contains(terminalId)) {
+ return NOT_A_DICT_POS;
+ }
+ return addressLookupTable->get(terminalId);
+}
+
+bool ShortcutDictContent::runGC(
+ const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const ShortcutDictContent *const originalShortcutDictContent) {
+ for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin();
+ it != terminalIdMap->end(); ++it) {
+ const int originalShortcutListPos =
+ originalShortcutDictContent->getShortcutListHeadPos(it->first);
+ if (originalShortcutListPos == NOT_A_DICT_POS) {
+ continue;
+ }
+ const int shortcutListPos = getContentBuffer()->getTailPosition();
+ // Copy shortcut list from original content.
+ if (!copyShortcutListFromDictContent(originalShortcutListPos, originalShortcutDictContent,
+ shortcutListPos)) {
+ AKLOGE("Cannot copy shortcut list during GC. original pos: %d, pos: %d",
+ originalShortcutListPos, shortcutListPos);
+ return false;
+ }
+ // Set shortcut list position to the lookup table.
+ if (!getUpdatableAddressLookupTable()->set(it->second, shortcutListPos)) {
+ AKLOGE("Cannot set shortcut list position. terminal id: %d, pos: %d",
+ it->second, shortcutListPos);
+ return false;
+ }
+ }
+ return true;
+}
+
+bool ShortcutDictContent::createNewShortcutList(const int terminalId) {
+ const int shortcutListListPos = getContentBuffer()->getTailPosition();
+ return getUpdatableAddressLookupTable()->set(terminalId, shortcutListListPos);
+}
+
+bool ShortcutDictContent::copyShortcutList(const int shortcutListPos, const int toPos) {
+ return copyShortcutListFromDictContent(shortcutListPos, this, toPos);
+}
+
+bool ShortcutDictContent::copyShortcutListFromDictContent(const int shortcutListPos,
+ const ShortcutDictContent *const sourceShortcutDictContent, const int toPos) {
+ bool hasNext = true;
+ int readingPos = shortcutListPos;
+ int writingPos = toPos;
+ int codePoints[MAX_WORD_LENGTH];
+ while (hasNext) {
+ int probability = 0;
+ int codePointCount = 0;
+ sourceShortcutDictContent->getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH,
+ codePoints, &codePointCount, &probability, &hasNext, &readingPos);
+ if (!writeShortcutEntryAndAdvancePosition(codePoints, codePointCount, probability,
+ hasNext, &writingPos)) {
+ AKLOGE("Cannot write shortcut entry to copy. pos: %d", writingPos);
+ return false;
+ }
+ }
+ return true;
+}
+
+bool ShortcutDictContent::setProbability(const int probability, const int shortcutEntryPos) {
+ BufferWithExtendableBuffer *const shortcutListBuffer = getWritableContentBuffer();
+ const int shortcutFlags = shortcutListBuffer->readUint(
+ Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos);
+ const bool hasNext = shortcutFlags & Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK;
+ const int shortcutFlagsToWrite = createAndGetShortcutFlags(probability, hasNext);
+ return shortcutListBuffer->writeUint(shortcutFlagsToWrite,
+ Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos);
+}
+
+bool ShortcutDictContent::writeShortcutEntryAndAdvancePosition(const int *const codePoint,
+ const int codePointCount, const int probability, const bool hasNext,
+ int *const shortcutEntryPos) {
+ BufferWithExtendableBuffer *const shortcutListBuffer = getWritableContentBuffer();
+ const int shortcutFlags = createAndGetShortcutFlags(probability, hasNext);
+ if (!shortcutListBuffer->writeUintAndAdvancePosition(shortcutFlags,
+ Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos)) {
+ AKLOGE("Cannot write shortcut flags. flags; %x, pos: %d", shortcutFlags, *shortcutEntryPos);
+ return false;
+ }
+ if (!shortcutListBuffer->writeCodePointsAndAdvancePosition(codePoint, codePointCount,
+ true /* writesTerminator */, shortcutEntryPos)) {
+ AKLOGE("Cannot write shortcut target code points. pos: %d", *shortcutEntryPos);
+ return false;
+ }
+ return true;
+}
+
+// Find a shortcut entry that has specified target and return its position.
+int ShortcutDictContent::findShortcutEntryAndGetPos(const int shortcutListPos,
+ const int *const targetCodePointsToFind, const int codePointCount) const {
+ bool hasNext = true;
+ int readingPos = shortcutListPos;
+ int targetCodePoints[MAX_WORD_LENGTH];
+ while (hasNext) {
+ const int entryPos = readingPos;
+ int probability = 0;
+ int targetCodePointCount = 0;
+ getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, targetCodePoints, &targetCodePointCount,
+ &probability, &hasNext, &readingPos);
+ if (targetCodePointCount != codePointCount) {
+ continue;
+ }
+ bool matched = true;
+ for (int i = 0; i < codePointCount; ++i) {
+ if (targetCodePointsToFind[i] != targetCodePoints[i]) {
+ matched = false;
+ break;
+ }
+ }
+ if (matched) {
+ return entryPos;
+ }
+ }
+ return NOT_A_DICT_POS;
+}
+
+int ShortcutDictContent::createAndGetShortcutFlags(const int probability,
+ const bool hasNext) const {
+ return (probability & Ver4DictConstants::SHORTCUT_PROBABILITY_MASK)
+ | (hasNext ? Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK : 0);
+}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h
new file mode 100644
index 0000000..9c4b202
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_SHORTCUT_DICT_CONTENT_H
+#define LATINIME_SHORTCUT_DICT_CONTENT_H
+
+#include <cstdint>
+#include <cstdio>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+
+namespace latinime {
+
+class ShortcutDictContent : public SparseTableDictContent {
+ public:
+ ShortcutDictContent(uint8_t *const *buffers, const int *bufferSizes)
+ : SparseTableDictContent(buffers, bufferSizes,
+ Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
+ Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {}
+
+ ShortcutDictContent()
+ : SparseTableDictContent(Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
+ Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {}
+
+ void getShortcutEntry(const int maxCodePointCount, int *const outCodePoint,
+ int *const outCodePointCount, int *const outProbability, bool *const outhasNext,
+ const int shortcutEntryPos) {
+ int readingPos = shortcutEntryPos;
+ return getShortcutEntryAndAdvancePosition(maxCodePointCount, outCodePoint,
+ outCodePointCount, outProbability, outhasNext, &readingPos);
+ }
+
+ void getShortcutEntryAndAdvancePosition(const int maxCodePointCount,
+ int *const outCodePoint, int *const outCodePointCount, int *const outProbability,
+ bool *const outhasNext, int *const shortcutEntryPos) const;
+
+ // Returns head position of shortcut list for a PtNode specified by terminalId.
+ int getShortcutListHeadPos(const int terminalId) const;
+
+ bool flushToFile(FILE *const file) const {
+ return flush(file);
+ }
+
+ bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const ShortcutDictContent *const originalShortcutDictContent);
+
+ bool createNewShortcutList(const int terminalId);
+
+ bool copyShortcutList(const int shortcutListPos, const int toPos);
+
+ bool setProbability(const int probability, const int shortcutEntryPos);
+
+ bool writeShortcutEntry(const int *const codePoint, const int codePointCount,
+ const int probability, const bool hasNext, const int shortcutEntryPos) {
+ int writingPos = shortcutEntryPos;
+ return writeShortcutEntryAndAdvancePosition(codePoint, codePointCount, probability,
+ hasNext, &writingPos);
+ }
+
+ bool writeShortcutEntryAndAdvancePosition(const int *const codePoint,
+ const int codePointCount, const int probability, const bool hasNext,
+ int *const shortcutEntryPos);
+
+ int findShortcutEntryAndGetPos(const int shortcutListPos,
+ const int *const targetCodePointsToFind, const int codePointCount) const;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ShortcutDictContent);
+
+ bool copyShortcutListFromDictContent(const int shortcutListPos,
+ const ShortcutDictContent *const sourceShortcutDictContent, const int toPos);
+
+ int createAndGetShortcutFlags(const int probability, const bool hasNext) const;
+};
+} // namespace latinime
+#endif /* LATINIME_SHORTCUT_DICT_CONTENT_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h
new file mode 100644
index 0000000..8978c17
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_SINGLE_DICT_CONTENT_H
+#define LATINIME_SINGLE_DICT_CONTENT_H
+
+#include <cstdint>
+#include <cstdio>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
+#include "third_party/android_prediction/utils/byte_array_view.h"
+
+namespace latinime {
+
+class SingleDictContent {
+ public:
+ SingleDictContent(uint8_t *const buffer, const int bufferSize)
+ : mExpandableContentBuffer(ReadWriteByteArrayView(buffer, bufferSize),
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE) {}
+
+ SingleDictContent()
+ : mExpandableContentBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE) {}
+
+ virtual ~SingleDictContent() {}
+
+ bool isNearSizeLimit() const {
+ return mExpandableContentBuffer.isNearSizeLimit();
+ }
+
+ protected:
+ BufferWithExtendableBuffer *getWritableBuffer() {
+ return &mExpandableContentBuffer;
+ }
+
+ const BufferWithExtendableBuffer *getBuffer() const {
+ return &mExpandableContentBuffer;
+ }
+
+ bool flush(FILE *const file) const {
+ return DictFileWritingUtils::writeBufferToFileTail(file, &mExpandableContentBuffer);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(SingleDictContent);
+
+ BufferWithExtendableBuffer mExpandableContentBuffer;
+};
+} // namespace latinime
+#endif /* LATINIME_SINGLE_DICT_CONTENT_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.cpp
new file mode 100644
index 0000000..f8b9a62
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
+
+namespace latinime {
+
+const int SparseTableDictContent::LOOKUP_TABLE_BUFFER_INDEX = 0;
+const int SparseTableDictContent::ADDRESS_TABLE_BUFFER_INDEX = 1;
+const int SparseTableDictContent::CONTENT_BUFFER_INDEX = 2;
+
+bool SparseTableDictContent::flush(FILE *const file) const {
+ if (!DictFileWritingUtils::writeBufferToFileTail(file, &mExpandableLookupTableBuffer)) {
+ return false;
+ }
+ if (!DictFileWritingUtils::writeBufferToFileTail(file, &mExpandableAddressTableBuffer)) {
+ return false;
+ }
+ if (!DictFileWritingUtils::writeBufferToFileTail(file, &mExpandableContentBuffer)) {
+ return false;
+ }
+ return true;
+}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h
new file mode 100644
index 0000000..f87bdc3
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_SPARSE_TABLE_DICT_CONTENT_H
+#define LATINIME_SPARSE_TABLE_DICT_CONTENT_H
+
+#include <cstdint>
+#include <cstdio>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/sparse_table.h"
+#include "third_party/android_prediction/utils/byte_array_view.h"
+
+namespace latinime {
+
+// TODO: Support multiple contents.
+class SparseTableDictContent {
+ public:
+ AK_FORCE_INLINE SparseTableDictContent(uint8_t *const *buffers, const int *bufferSizes,
+ const int sparseTableBlockSize, const int sparseTableDataSize)
+ : mExpandableLookupTableBuffer(
+ ReadWriteByteArrayView(buffers[LOOKUP_TABLE_BUFFER_INDEX],
+ bufferSizes[LOOKUP_TABLE_BUFFER_INDEX]),
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
+ mExpandableAddressTableBuffer(
+ ReadWriteByteArrayView(buffers[ADDRESS_TABLE_BUFFER_INDEX],
+ bufferSizes[ADDRESS_TABLE_BUFFER_INDEX]),
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
+ mExpandableContentBuffer(
+ ReadWriteByteArrayView(buffers[CONTENT_BUFFER_INDEX],
+ bufferSizes[CONTENT_BUFFER_INDEX]),
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
+ mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer,
+ sparseTableBlockSize, sparseTableDataSize) {}
+
+ SparseTableDictContent(const int sparseTableBlockSize, const int sparseTableDataSize)
+ : mExpandableLookupTableBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
+ mExpandableAddressTableBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
+ mExpandableContentBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
+ mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer,
+ sparseTableBlockSize, sparseTableDataSize) {}
+
+ virtual ~SparseTableDictContent() {}
+
+ bool isNearSizeLimit() const {
+ return mExpandableLookupTableBuffer.isNearSizeLimit()
+ || mExpandableAddressTableBuffer.isNearSizeLimit()
+ || mExpandableContentBuffer.isNearSizeLimit();
+ }
+
+ protected:
+ SparseTable *getUpdatableAddressLookupTable() {
+ return &mAddressLookupTable;
+ }
+
+ const SparseTable *getAddressLookupTable() const {
+ return &mAddressLookupTable;
+ }
+
+ BufferWithExtendableBuffer *getWritableContentBuffer() {
+ return &mExpandableContentBuffer;
+ }
+
+ const BufferWithExtendableBuffer *getContentBuffer() const {
+ return &mExpandableContentBuffer;
+ }
+
+ bool flush(FILE *const file) const;
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTableDictContent);
+
+ static const int LOOKUP_TABLE_BUFFER_INDEX;
+ static const int ADDRESS_TABLE_BUFFER_INDEX;
+ static const int CONTENT_BUFFER_INDEX;
+
+ BufferWithExtendableBuffer mExpandableLookupTableBuffer;
+ BufferWithExtendableBuffer mExpandableAddressTableBuffer;
+ BufferWithExtendableBuffer mExpandableContentBuffer;
+ SparseTable mAddressLookupTable;
+};
+} // namespace latinime
+#endif /* LATINIME_SPARSE_TABLE_DICT_CONTENT_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.cpp
new file mode 100644
index 0000000..282c906
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.cpp
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+int TerminalPositionLookupTable::getTerminalPtNodePosition(const int terminalId) const {
+ if (terminalId < 0 || terminalId >= mSize) {
+ return NOT_A_DICT_POS;
+ }
+ const int terminalPos = getBuffer()->readUint(
+ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(terminalId));
+ return (terminalPos == Ver4DictConstants::NOT_A_TERMINAL_ADDRESS) ?
+ NOT_A_DICT_POS : terminalPos;
+}
+
+bool TerminalPositionLookupTable::setTerminalPtNodePosition(
+ const int terminalId, const int terminalPtNodePos) {
+ if (terminalId < 0) {
+ return NOT_A_DICT_POS;
+ }
+ while (terminalId >= mSize) {
+ // Write new entry.
+ if (!getWritableBuffer()->writeUint(Ver4DictConstants::NOT_A_TERMINAL_ADDRESS,
+ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(mSize))) {
+ return false;
+ }
+ mSize++;
+ }
+ const int terminalPos = (terminalPtNodePos != NOT_A_DICT_POS) ?
+ terminalPtNodePos : Ver4DictConstants::NOT_A_TERMINAL_ADDRESS;
+ return getWritableBuffer()->writeUint(terminalPos,
+ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(terminalId));
+}
+
+bool TerminalPositionLookupTable::flushToFile(FILE *const file) const {
+ // If the used buffer size is smaller than the actual buffer size, regenerate the lookup
+ // table and write the new table to the file.
+ if (getEntryPos(mSize) < getBuffer()->getTailPosition()) {
+ TerminalPositionLookupTable lookupTableToWrite;
+ for (int i = 0; i < mSize; ++i) {
+ const int terminalPtNodePosition = getTerminalPtNodePosition(i);
+ if (!lookupTableToWrite.setTerminalPtNodePosition(i, terminalPtNodePosition)) {
+ AKLOGE("Cannot set terminal position to lookupTableToWrite."
+ " terminalId: %d, position: %d", i, terminalPtNodePosition);
+ return false;
+ }
+ }
+ return lookupTableToWrite.flush(file);
+ } else {
+ // We can simply use this lookup table because the buffer size has not been
+ // changed.
+ return flush(file);
+ }
+}
+
+bool TerminalPositionLookupTable::runGCTerminalIds(TerminalIdMap *const terminalIdMap) {
+ int removedEntryCount = 0;
+ int nextNewTerminalId = 0;
+ for (int i = 0; i < mSize; ++i) {
+ const int terminalPos = getBuffer()->readUint(
+ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(i));
+ if (terminalPos == Ver4DictConstants::NOT_A_TERMINAL_ADDRESS) {
+ // This entry is a garbage.
+ removedEntryCount++;
+ } else {
+ // Give a new terminal id to the entry.
+ if (!getWritableBuffer()->writeUint(terminalPos,
+ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE,
+ getEntryPos(nextNewTerminalId))) {
+ return false;
+ }
+ // Memorize the mapping to the old terminal id to the new terminal id.
+ terminalIdMap->insert(TerminalIdMap::value_type(i, nextNewTerminalId));
+ nextNewTerminalId++;
+ }
+ }
+ mSize = nextNewTerminalId;
+ return true;
+}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h
new file mode 100644
index 0000000..bace51a
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H
+#define LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H
+
+#include <cstdint>
+#include <cstdio>
+#include <unordered_map>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+
+namespace latinime {
+
+class TerminalPositionLookupTable : public SingleDictContent {
+ public:
+ typedef std::unordered_map<int, int> TerminalIdMap;
+
+ TerminalPositionLookupTable(uint8_t *const buffer, const int bufferSize)
+ : SingleDictContent(buffer, bufferSize),
+ mSize(getBuffer()->getTailPosition()
+ / Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE) {}
+
+ TerminalPositionLookupTable() : mSize(0) {}
+
+ int getTerminalPtNodePosition(const int terminalId) const;
+
+ bool setTerminalPtNodePosition(const int terminalId, const int terminalPtNodePos);
+
+ int getNextTerminalId() const {
+ return mSize;
+ }
+
+ bool flushToFile(FILE *const file) const;
+
+ bool runGCTerminalIds(TerminalIdMap *const terminalIdMap);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(TerminalPositionLookupTable);
+
+ int getEntryPos(const int terminalId) const {
+ return terminalId * Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE;
+ }
+
+ int mSize;
+};
+} // namespace latinime
+#endif // LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h
new file mode 100644
index 0000000..9886aca
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_SHORTCUT_LIST_POLICY_H
+#define LATINIME_VER4_SHORTCUT_LIST_POLICY_H
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/policy/dictionary_shortcuts_structure_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
+
+namespace latinime {
+
+class Ver4ShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
+ public:
+ Ver4ShortcutListPolicy(ShortcutDictContent *const shortcutDictContent,
+ const TerminalPositionLookupTable *const terminalPositionLookupTable)
+ : mShortcutDictContent(shortcutDictContent) {}
+
+ ~Ver4ShortcutListPolicy() {}
+
+ int getStartPos(const int pos) const {
+ // The first shortcut entry is located at the head position of the shortcut list.
+ return pos;
+ }
+
+ void getNextShortcut(const int maxCodePointCount, int *const outCodePoint,
+ int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext,
+ int *const pos) const {
+ int probability = 0;
+ mShortcutDictContent->getShortcutEntryAndAdvancePosition(maxCodePointCount,
+ outCodePoint, outCodePointCount, &probability, outHasNext, pos);
+ if (outIsWhitelist) {
+ *outIsWhitelist = ShortcutListReadingUtils::isWhitelist(probability);
+ }
+ }
+
+ void skipAllShortcuts(int *const pos) const {
+ // Do nothing because we don't need to skip shortcut lists in ver4 dictionaries.
+ }
+
+ bool addNewShortcut(const int terminalId, const int *const codePoints, const int codePointCount,
+ const int probability) {
+ const int shortcutListPos = mShortcutDictContent->getShortcutListHeadPos(terminalId);
+ if (shortcutListPos == NOT_A_DICT_POS) {
+ // Create shortcut list.
+ if (!mShortcutDictContent->createNewShortcutList(terminalId)) {
+ AKLOGE("Cannot create new shortcut list. terminal id: %d", terminalId);
+ return false;
+ }
+ const int writingPos = mShortcutDictContent->getShortcutListHeadPos(terminalId);
+ return mShortcutDictContent->writeShortcutEntry(codePoints, codePointCount, probability,
+ false /* hasNext */, writingPos);
+ }
+ const int entryPos = mShortcutDictContent->findShortcutEntryAndGetPos(shortcutListPos,
+ codePoints, codePointCount);
+ if (entryPos == NOT_A_DICT_POS) {
+ // Add new entry to the shortcut list.
+ // Create new shortcut list.
+ if (!mShortcutDictContent->createNewShortcutList(terminalId)) {
+ AKLOGE("Cannot create new shortcut list. terminal id: %d", terminalId);
+ return false;
+ }
+ int writingPos = mShortcutDictContent->getShortcutListHeadPos(terminalId);
+ if (!mShortcutDictContent->writeShortcutEntryAndAdvancePosition(codePoints,
+ codePointCount, probability, true /* hasNext */, &writingPos)) {
+ AKLOGE("Cannot write shortcut entry. terminal id: %d, pos: %d", terminalId,
+ writingPos);
+ return false;
+ }
+ return mShortcutDictContent->copyShortcutList(shortcutListPos, writingPos);
+ }
+ // Overwrite existing entry.
+ bool hasNext = false;
+ mShortcutDictContent->getShortcutEntry(MAX_WORD_LENGTH, 0 /* outCodePoint */,
+ 0 /* outCodePointCount */ , 0 /* probability */, &hasNext, entryPos);
+ if (!mShortcutDictContent->writeShortcutEntry(codePoints,
+ codePointCount, probability, hasNext, entryPos)) {
+ AKLOGE("Cannot overwrite shortcut entry. terminal id: %d, pos: %d", terminalId,
+ entryPos);
+ return false;
+ }
+ return true;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4ShortcutListPolicy);
+
+ ShortcutDictContent *const mShortcutDictContent;
+};
+} // namespace latinime
+#endif // LATINIME_VER4_SHORTCUT_LIST_POLICY_H
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
new file mode 100644
index 0000000..d62b04f
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
@@ -0,0 +1,214 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
+
+#include <cerrno>
+#include <cstring>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <vector>
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/byte_array_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/file_utils.h"
+#include "third_party/android_prediction/utils/byte_array_view.h"
+
+namespace latinime {
+
+/* static */ Ver4DictBuffers::Ver4DictBuffersPtr Ver4DictBuffers::openVer4DictBuffers(
+ const char *const dictPath, MmappedBuffer::MmappedBufferPtr &&headerBuffer,
+ const FormatUtils::FORMAT_VERSION formatVersion) {
+ if (!headerBuffer) {
+ ASSERT(false);
+ AKLOGE("The header buffer must be valid to open ver4 dict buffers.");
+ return Ver4DictBuffersPtr(nullptr);
+ }
+ // TODO: take only dictDirPath, and open both header and trie files in the constructor below
+ const bool isUpdatable = headerBuffer->isUpdatable();
+ MmappedBuffer::MmappedBufferPtr bodyBuffer = MmappedBuffer::openBuffer(dictPath,
+ Ver4DictConstants::BODY_FILE_EXTENSION, isUpdatable);
+ if (!bodyBuffer) {
+ return Ver4DictBuffersPtr(nullptr);
+ }
+ std::vector<uint8_t *> buffers;
+ std::vector<int> bufferSizes;
+ const ReadWriteByteArrayView buffer = bodyBuffer->getReadWriteByteArrayView();
+ int position = 0;
+ while (position < static_cast<int>(buffer.size())) {
+ const int bufferSize = ByteArrayUtils::readUint32AndAdvancePosition(
+ buffer.data(), &position);
+ const ReadWriteByteArrayView subBuffer = buffer.subView(position, bufferSize);
+ buffers.push_back(subBuffer.data());
+ bufferSizes.push_back(subBuffer.size());
+ position += bufferSize;
+ if (bufferSize < 0 || position < 0 || position > static_cast<int>(buffer.size())) {
+ AKLOGE("The dict body file is corrupted.");
+ return Ver4DictBuffersPtr(nullptr);
+ }
+ }
+ if (buffers.size() != Ver4DictConstants::NUM_OF_CONTENT_BUFFERS_IN_BODY_FILE) {
+ AKLOGE("The dict body file is corrupted.");
+ return Ver4DictBuffersPtr(nullptr);
+ }
+ return Ver4DictBuffersPtr(new Ver4DictBuffers(std::move(headerBuffer), std::move(bodyBuffer),
+ formatVersion, buffers, bufferSizes));
+}
+
+bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath,
+ const BufferWithExtendableBuffer *const headerBuffer) const {
+ // Create temporary directory.
+ const int tmpDirPathBufSize = FileUtils::getFilePathWithSuffixBufSize(dictDirPath,
+ DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE);
+ char tmpDirPath[tmpDirPathBufSize];
+ FileUtils::getFilePathWithSuffix(dictDirPath,
+ DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE, tmpDirPathBufSize,
+ tmpDirPath);
+ if (FileUtils::existsDir(tmpDirPath)) {
+ if (!FileUtils::removeDirAndFiles(tmpDirPath)) {
+ AKLOGE("Existing directory %s cannot be removed.", tmpDirPath);
+ ASSERT(false);
+ return false;
+ }
+ }
+ umask(S_IWGRP | S_IWOTH);
+ if (mkdir(tmpDirPath, S_IRWXU) == -1) {
+ AKLOGE("Cannot create directory: %s. errno: %d.", tmpDirPath, errno);
+ return false;
+ }
+ // Get dictionary base path.
+ const int dictNameBufSize = strlen(dictDirPath) + 1 /* terminator */;
+ char dictName[dictNameBufSize];
+ FileUtils::getBasename(dictDirPath, dictNameBufSize, dictName);
+ const int dictPathBufSize = FileUtils::getFilePathBufSize(tmpDirPath, dictName);
+ char dictPath[dictPathBufSize];
+ FileUtils::getFilePath(tmpDirPath, dictName, dictPathBufSize, dictPath);
+
+ // Write header file.
+ if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath,
+ Ver4DictConstants::HEADER_FILE_EXTENSION, headerBuffer)) {
+ AKLOGE("Dictionary header file %s%s cannot be written.", tmpDirPath,
+ Ver4DictConstants::HEADER_FILE_EXTENSION);
+ return false;
+ }
+
+ // Write body file.
+ const int bodyFilePathBufSize = FileUtils::getFilePathWithSuffixBufSize(dictPath,
+ Ver4DictConstants::BODY_FILE_EXTENSION);
+ char bodyFilePath[bodyFilePathBufSize];
+ FileUtils::getFilePathWithSuffix(dictPath, Ver4DictConstants::BODY_FILE_EXTENSION,
+ bodyFilePathBufSize, bodyFilePath);
+
+ const int fd = open(bodyFilePath, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
+ if (fd == -1) {
+ AKLOGE("File %s cannot be opened. errno: %d", bodyFilePath, errno);
+ ASSERT(false);
+ return false;
+ }
+ FILE *const file = fdopen(fd, "wb");
+ if (!file) {
+ AKLOGE("fdopen failed for the file %s. errno: %d", bodyFilePath, errno);
+ ASSERT(false);
+ return false;
+ }
+
+ if (!flushDictBuffers(file)) {
+ fclose(file);
+ return false;
+ }
+ fclose(file);
+ // Remove existing dictionary.
+ if (!FileUtils::removeDirAndFiles(dictDirPath)) {
+ AKLOGE("Existing directory %s cannot be removed.", dictDirPath);
+ ASSERT(false);
+ return false;
+ }
+ // Rename temporary directory.
+ if (rename(tmpDirPath, dictDirPath) != 0) {
+ AKLOGE("%s cannot be renamed to %s", tmpDirPath, dictDirPath);
+ ASSERT(false);
+ return false;
+ }
+ return true;
+}
+
+bool Ver4DictBuffers::flushDictBuffers(FILE *const file) const {
+ // Write trie.
+ if (!DictFileWritingUtils::writeBufferToFileTail(file, &mExpandableTrieBuffer)) {
+ AKLOGE("Trie cannot be written.");
+ return false;
+ }
+ // Write terminal position lookup table.
+ if (!mTerminalPositionLookupTable.flushToFile(file)) {
+ AKLOGE("Terminal position lookup table cannot be written.");
+ return false;
+ }
+ // Write language model content.
+ if (!mLanguageModelDictContent.save(file)) {
+ AKLOGE("Language model dict content cannot be written.");
+ return false;
+ }
+ // Write bigram dict content.
+ if (!mBigramDictContent.flushToFile(file)) {
+ AKLOGE("Bigram dict content cannot be written.");
+ return false;
+ }
+ // Write shortcut dict content.
+ if (!mShortcutDictContent.flushToFile(file)) {
+ AKLOGE("Shortcut dict content cannot be written.");
+ return false;
+ }
+ return true;
+}
+
+Ver4DictBuffers::Ver4DictBuffers(MmappedBuffer::MmappedBufferPtr &&headerBuffer,
+ MmappedBuffer::MmappedBufferPtr &&bodyBuffer,
+ const FormatUtils::FORMAT_VERSION formatVersion,
+ const std::vector<uint8_t *> &contentBuffers, const std::vector<int> &contentBufferSizes)
+ : mHeaderBuffer(std::move(headerBuffer)), mDictBuffer(std::move(bodyBuffer)),
+ mHeaderPolicy(mHeaderBuffer->getReadOnlyByteArrayView().data(), formatVersion),
+ mExpandableHeaderBuffer(mHeaderBuffer->getReadWriteByteArrayView(),
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
+ mExpandableTrieBuffer(
+ ReadWriteByteArrayView(contentBuffers[Ver4DictConstants::TRIE_BUFFER_INDEX],
+ contentBufferSizes[Ver4DictConstants::TRIE_BUFFER_INDEX]),
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
+ mTerminalPositionLookupTable(
+ contentBuffers[Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX],
+ contentBufferSizes[
+ Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX]),
+ mLanguageModelDictContent(
+ ReadWriteByteArrayView(
+ contentBuffers[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX],
+ contentBufferSizes[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX]),
+ mHeaderPolicy.hasHistoricalInfoOfWords()),
+ mBigramDictContent(&contentBuffers[Ver4DictConstants::BIGRAM_BUFFERS_INDEX],
+ &contentBufferSizes[Ver4DictConstants::BIGRAM_BUFFERS_INDEX],
+ mHeaderPolicy.hasHistoricalInfoOfWords()),
+ mShortcutDictContent(&contentBuffers[Ver4DictConstants::SHORTCUT_BUFFERS_INDEX],
+ &contentBufferSizes[Ver4DictConstants::SHORTCUT_BUFFERS_INDEX]),
+ mIsUpdatable(mDictBuffer->isUpdatable()) {}
+
+Ver4DictBuffers::Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize)
+ : mHeaderBuffer(nullptr), mDictBuffer(nullptr), mHeaderPolicy(headerPolicy),
+ mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
+ mExpandableTrieBuffer(maxTrieSize), mTerminalPositionLookupTable(),
+ mLanguageModelDictContent(headerPolicy->hasHistoricalInfoOfWords()),
+ mBigramDictContent(headerPolicy->hasHistoricalInfoOfWords()), mShortcutDictContent(),
+ mIsUpdatable(true) {}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
new file mode 100644
index 0000000..721c9d4
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_DICT_BUFFER_H
+#define LATINIME_VER4_DICT_BUFFER_H
+
+#include <cstdio>
+#include <memory>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/header/header_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+
+namespace latinime {
+
+class Ver4DictBuffers {
+ public:
+ typedef std::unique_ptr<Ver4DictBuffers> Ver4DictBuffersPtr;
+
+ static Ver4DictBuffersPtr openVer4DictBuffers(const char *const dictDirPath,
+ MmappedBuffer::MmappedBufferPtr &&headerBuffer,
+ const FormatUtils::FORMAT_VERSION formatVersion);
+
+ static AK_FORCE_INLINE Ver4DictBuffersPtr createVer4DictBuffers(
+ const HeaderPolicy *const headerPolicy, const int maxTrieSize) {
+ return Ver4DictBuffersPtr(new Ver4DictBuffers(headerPolicy, maxTrieSize));
+ }
+
+ AK_FORCE_INLINE bool isValid() const {
+ return mHeaderBuffer && mDictBuffer && mHeaderPolicy.isValid();
+ }
+
+ AK_FORCE_INLINE bool isNearSizeLimit() const {
+ return mExpandableTrieBuffer.isNearSizeLimit()
+ || mTerminalPositionLookupTable.isNearSizeLimit()
+ || mLanguageModelDictContent.isNearSizeLimit()
+ || mBigramDictContent.isNearSizeLimit()
+ || mShortcutDictContent.isNearSizeLimit();
+ }
+
+ AK_FORCE_INLINE const HeaderPolicy *getHeaderPolicy() const {
+ return &mHeaderPolicy;
+ }
+
+ AK_FORCE_INLINE BufferWithExtendableBuffer *getWritableHeaderBuffer() {
+ return &mExpandableHeaderBuffer;
+ }
+
+ AK_FORCE_INLINE BufferWithExtendableBuffer *getWritableTrieBuffer() {
+ return &mExpandableTrieBuffer;
+ }
+
+ AK_FORCE_INLINE const BufferWithExtendableBuffer *getTrieBuffer() const {
+ return &mExpandableTrieBuffer;
+ }
+
+ AK_FORCE_INLINE TerminalPositionLookupTable *getMutableTerminalPositionLookupTable() {
+ return &mTerminalPositionLookupTable;
+ }
+
+ AK_FORCE_INLINE const TerminalPositionLookupTable *getTerminalPositionLookupTable() const {
+ return &mTerminalPositionLookupTable;
+ }
+
+ AK_FORCE_INLINE LanguageModelDictContent *getMutableLanguageModelDictContent() {
+ return &mLanguageModelDictContent;
+ }
+
+ AK_FORCE_INLINE const LanguageModelDictContent *getLanguageModelDictContent() const {
+ return &mLanguageModelDictContent;
+ }
+
+ AK_FORCE_INLINE BigramDictContent *getMutableBigramDictContent() {
+ return &mBigramDictContent;
+ }
+
+ AK_FORCE_INLINE const BigramDictContent *getBigramDictContent() const {
+ return &mBigramDictContent;
+ }
+
+ AK_FORCE_INLINE ShortcutDictContent *getMutableShortcutDictContent() {
+ return &mShortcutDictContent;
+ }
+
+ AK_FORCE_INLINE const ShortcutDictContent *getShortcutDictContent() const {
+ return &mShortcutDictContent;
+ }
+
+ AK_FORCE_INLINE bool isUpdatable() const {
+ return mIsUpdatable;
+ }
+
+ bool flush(const char *const dictDirPath) const {
+ return flushHeaderAndDictBuffers(dictDirPath, &mExpandableHeaderBuffer);
+ }
+
+ bool flushHeaderAndDictBuffers(const char *const dictDirPath,
+ const BufferWithExtendableBuffer *const headerBuffer) const;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(Ver4DictBuffers);
+
+ Ver4DictBuffers(MmappedBuffer::MmappedBufferPtr &&headerBuffer,
+ MmappedBuffer::MmappedBufferPtr &&bodyBuffer,
+ const FormatUtils::FORMAT_VERSION formatVersion,
+ const std::vector<uint8_t *> &contentBuffers,
+ const std::vector<int> &contentBufferSizes);
+
+ Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize);
+
+ bool flushDictBuffers(FILE *const file) const;
+
+ const MmappedBuffer::MmappedBufferPtr mHeaderBuffer;
+ const MmappedBuffer::MmappedBufferPtr mDictBuffer;
+ const HeaderPolicy mHeaderPolicy;
+ BufferWithExtendableBuffer mExpandableHeaderBuffer;
+ BufferWithExtendableBuffer mExpandableTrieBuffer;
+ TerminalPositionLookupTable mTerminalPositionLookupTable;
+ LanguageModelDictContent mLanguageModelDictContent;
+ BigramDictContent mBigramDictContent;
+ ShortcutDictContent mShortcutDictContent;
+ const int mIsUpdatable;
+};
+} // namespace latinime
+#endif /* LATINIME_VER4_DICT_BUFFER_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
new file mode 100644
index 0000000..fa54d76
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+
+namespace latinime {
+
+const char *const Ver4DictConstants::BODY_FILE_EXTENSION = ".body";
+const char *const Ver4DictConstants::HEADER_FILE_EXTENSION = ".header";
+
+// Version 4 dictionary size is implicitly limited to 8MB due to 3-byte offsets.
+const int Ver4DictConstants::MAX_DICTIONARY_SIZE = 8 * 1024 * 1024;
+// Extended region size, which is not GCed region size in dict file + additional buffer size, is
+// limited to 1MB to prevent from inefficient traversing.
+const int Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE = 1 * 1024 * 1024;
+
+// NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT for Trie and TerminalAddressLookupTable.
+// NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT for language model.
+// NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT for bigram and shortcut.
+const size_t Ver4DictConstants::NUM_OF_CONTENT_BUFFERS_IN_BODY_FILE =
+ NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT * 2
+ + NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT
+ + NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT * 2;
+const int Ver4DictConstants::TRIE_BUFFER_INDEX = 0;
+const int Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX =
+ TRIE_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT;
+const int Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX =
+ TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT;
+const int Ver4DictConstants::BIGRAM_BUFFERS_INDEX =
+ LANGUAGE_MODEL_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT;
+const int Ver4DictConstants::SHORTCUT_BUFFERS_INDEX =
+ BIGRAM_BUFFERS_INDEX + NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT;
+
+const int Ver4DictConstants::NOT_A_TERMINAL_ID = -1;
+const int Ver4DictConstants::PROBABILITY_SIZE = 1;
+const int Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE = 1;
+const int Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3;
+const int Ver4DictConstants::NOT_A_TERMINAL_ADDRESS = 0;
+const int Ver4DictConstants::TERMINAL_ID_FIELD_SIZE = 4;
+const int Ver4DictConstants::TIME_STAMP_FIELD_SIZE = 4;
+const int Ver4DictConstants::WORD_LEVEL_FIELD_SIZE = 1;
+const int Ver4DictConstants::WORD_COUNT_FIELD_SIZE = 1;
+
+const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 16;
+const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE = 4;
+const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 64;
+const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4;
+
+const int Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE = 3;
+// Unsigned int max value of BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE-byte is used for representing
+// invalid terminal ID in bigram lists.
+const int Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID =
+ (1 << (BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE * 8)) - 1;
+const int Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE = 1;
+const int Ver4DictConstants::BIGRAM_PROBABILITY_MASK = 0x0F;
+const int Ver4DictConstants::BIGRAM_IS_LINK_MASK = 0x80;
+const int Ver4DictConstants::BIGRAM_LARGE_PROBABILITY_FIELD_SIZE = 1;
+
+const int Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE = 1;
+const int Ver4DictConstants::SHORTCUT_PROBABILITY_MASK = 0x0F;
+const int Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK = 0x80;
+
+const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT = 1;
+const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT = 3;
+const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT = 1;
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
new file mode 100644
index 0000000..123b990
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_DICT_CONSTANTS_H
+#define LATINIME_VER4_DICT_CONSTANTS_H
+
+#include "third_party/android_prediction/defines.h"
+
+#include <cstddef>
+
+namespace latinime {
+
+// TODO: Create PtConstants under the pt_common and move some constant values there.
+// Note that there are corresponding definitions in FormatSpec.java.
+class Ver4DictConstants {
+ public:
+ static const char *const BODY_FILE_EXTENSION;
+ static const char *const HEADER_FILE_EXTENSION;
+ static const int MAX_DICTIONARY_SIZE;
+ static const int MAX_DICT_EXTENDED_REGION_SIZE;
+
+ static const size_t NUM_OF_CONTENT_BUFFERS_IN_BODY_FILE;
+ static const int TRIE_BUFFER_INDEX;
+ static const int TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX;
+ static const int LANGUAGE_MODEL_BUFFER_INDEX;
+ static const int BIGRAM_BUFFERS_INDEX;
+ static const int SHORTCUT_BUFFERS_INDEX;
+
+ static const int NOT_A_TERMINAL_ID;
+ static const int PROBABILITY_SIZE;
+ static const int FLAGS_IN_PROBABILITY_FILE_SIZE;
+ static const int TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE;
+ static const int NOT_A_TERMINAL_ADDRESS;
+ static const int TERMINAL_ID_FIELD_SIZE;
+ static const int TIME_STAMP_FIELD_SIZE;
+ static const int WORD_LEVEL_FIELD_SIZE;
+ static const int WORD_COUNT_FIELD_SIZE;
+
+ static const int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE;
+ static const int BIGRAM_ADDRESS_TABLE_DATA_SIZE;
+ static const int SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE;
+ static const int SHORTCUT_ADDRESS_TABLE_DATA_SIZE;
+
+ static const int BIGRAM_FLAGS_FIELD_SIZE;
+ static const int BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
+ static const int INVALID_BIGRAM_TARGET_TERMINAL_ID;
+ static const int BIGRAM_IS_LINK_MASK;
+ static const int BIGRAM_PROBABILITY_MASK;
+ // Used when bigram list has time stamp.
+ static const int BIGRAM_LARGE_PROBABILITY_FIELD_SIZE;
+
+ static const int SHORTCUT_FLAGS_FIELD_SIZE;
+ static const int SHORTCUT_PROBABILITY_MASK;
+ static const int SHORTCUT_HAS_NEXT_MASK;
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictConstants);
+
+ static const size_t NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT;
+ static const size_t NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT;
+ static const size_t NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT;
+};
+} // namespace latinime
+#endif /* LATINIME_VER4_DICT_CONSTANTS_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
new file mode 100644
index 0000000..69fc2cb
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+
+namespace latinime {
+
+const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode(
+ const int ptNodePos, const int siblingNodePos) const {
+ if (ptNodePos < 0 || ptNodePos >= mBuffer->getTailPosition()) {
+ // Reading invalid position because of bug or broken dictionary.
+ AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d",
+ ptNodePos, mBuffer->getTailPosition());
+ ASSERT(false);
+ return PtNodeParams();
+ }
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodePos);
+ const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
+ int pos = ptNodePos;
+ const int headPos = ptNodePos;
+ if (usesAdditionalBuffer) {
+ pos -= mBuffer->getOriginalBufferSize();
+ }
+ const PatriciaTrieReadingUtils::NodeFlags flags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
+ const int parentPosOffset =
+ DynamicPtReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(
+ dictBuf, &pos);
+ const int parentPos =
+ DynamicPtReadingUtils::getParentPtNodePos(parentPosOffset, headPos);
+ int codePoints[MAX_WORD_LENGTH];
+ const int codePonitCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
+ dictBuf, flags, MAX_WORD_LENGTH, codePoints, &pos);
+ int terminalIdFieldPos = NOT_A_DICT_POS;
+ int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
+ int probability = NOT_A_PROBABILITY;
+ if (PatriciaTrieReadingUtils::isTerminal(flags)) {
+ terminalIdFieldPos = pos;
+ if (usesAdditionalBuffer) {
+ terminalIdFieldPos += mBuffer->getOriginalBufferSize();
+ }
+ terminalId = Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(dictBuf, &pos);
+ // TODO: Quit reading probability here.
+ const ProbabilityEntry probabilityEntry =
+ mLanguageModelDictContent->getProbabilityEntry(terminalId);
+ if (probabilityEntry.hasHistoricalInfo()) {
+ probability = ForgettingCurveUtils::decodeProbability(
+ probabilityEntry.getHistoricalInfo(), mHeaderPolicy);
+ } else {
+ probability = probabilityEntry.getProbability();
+ }
+ }
+ int childrenPosFieldPos = pos;
+ if (usesAdditionalBuffer) {
+ childrenPosFieldPos += mBuffer->getOriginalBufferSize();
+ }
+ int childrenPos = DynamicPtReadingUtils::readChildrenPositionAndAdvancePosition(
+ dictBuf, &pos);
+ if (usesAdditionalBuffer && childrenPos != NOT_A_DICT_POS) {
+ childrenPos += mBuffer->getOriginalBufferSize();
+ }
+ if (usesAdditionalBuffer) {
+ pos += mBuffer->getOriginalBufferSize();
+ }
+ // Sibling position is the tail position of original PtNode.
+ int newSiblingNodePos = (siblingNodePos == NOT_A_DICT_POS) ? pos : siblingNodePos;
+ // Read destination node if the read node is a moved node.
+ if (DynamicPtReadingUtils::isMoved(flags)) {
+ // The destination position is stored at the same place as the parent position.
+ return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(parentPos, newSiblingNodePos);
+ } else {
+ return PtNodeParams(headPos, flags, parentPos, codePonitCount, codePoints,
+ terminalIdFieldPos, terminalId, probability, childrenPosFieldPos, childrenPos,
+ newSiblingNodePos);
+ }
+}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h
new file mode 100644
index 0000000..8e0e2a7
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_PATRICIA_TRIE_NODE_READER_H
+#define LATINIME_VER4_PATRICIA_TRIE_NODE_READER_H
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
+
+namespace latinime {
+
+class BufferWithExtendableBuffer;
+class HeaderPolicy;
+class LanguageModelDictContent;
+
+/*
+ * This class is used for helping to read nodes of ver4 patricia trie. This class handles moved
+ * node and reads node attributes including probability form language model.
+ */
+class Ver4PatriciaTrieNodeReader : public PtNodeReader {
+ public:
+ Ver4PatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer,
+ const LanguageModelDictContent *const languageModelDictContent,
+ const HeaderPolicy *const headerPolicy)
+ : mBuffer(buffer), mLanguageModelDictContent(languageModelDictContent),
+ mHeaderPolicy(headerPolicy) {}
+
+ ~Ver4PatriciaTrieNodeReader() {}
+
+ virtual const PtNodeParams fetchPtNodeParamsInBufferFromPtNodePos(const int ptNodePos) const {
+ return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(ptNodePos,
+ NOT_A_DICT_POS /* siblingNodePos */);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeReader);
+
+ const BufferWithExtendableBuffer *const mBuffer;
+ const LanguageModelDictContent *const mLanguageModelDictContent;
+ const HeaderPolicy *const mHeaderPolicy;
+
+ const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
+ const int siblingNodePos) const;
+};
+} // namespace latinime
+#endif /* LATINIME_VER4_PATRICIA_TRIE_NODE_READER_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
new file mode 100644
index 0000000..364a4c9
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
@@ -0,0 +1,387 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
+
+#include "third_party/android_prediction/suggest/core/dictionary/property/unigram_property.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/header/header_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+
+namespace latinime {
+
+const int Ver4PatriciaTrieNodeWriter::CHILDREN_POSITION_FIELD_SIZE = 3;
+
+bool Ver4PatriciaTrieNodeWriter::markPtNodeAsDeleted(
+ const PtNodeParams *const toBeUpdatedPtNodeParams) {
+ int pos = toBeUpdatedPtNodeParams->getHeadPos();
+ const bool usesAdditionalBuffer = mTrieBuffer->isInAdditionalBuffer(pos);
+ const uint8_t *const dictBuf = mTrieBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ pos -= mTrieBuffer->getOriginalBufferSize();
+ }
+ // Read original flags
+ const PatriciaTrieReadingUtils::NodeFlags originalFlags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
+ const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
+ DynamicPtReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */,
+ true /* isDeleted */, false /* willBecomeNonTerminal */);
+ int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
+ // Update flags.
+ if (!DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags,
+ &writingPos)) {
+ return false;
+ }
+ if (toBeUpdatedPtNodeParams->isTerminal()) {
+ // The PtNode is a terminal. Delete entry from the terminal position lookup table.
+ return mBuffers->getMutableTerminalPositionLookupTable()->setTerminalPtNodePosition(
+ toBeUpdatedPtNodeParams->getTerminalId(), NOT_A_DICT_POS /* ptNodePos */);
+ } else {
+ return true;
+ }
+}
+
+bool Ver4PatriciaTrieNodeWriter::markPtNodeAsMoved(
+ const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const int movedPos, const int bigramLinkedNodePos) {
+ int pos = toBeUpdatedPtNodeParams->getHeadPos();
+ const bool usesAdditionalBuffer = mTrieBuffer->isInAdditionalBuffer(pos);
+ const uint8_t *const dictBuf = mTrieBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ pos -= mTrieBuffer->getOriginalBufferSize();
+ }
+ // Read original flags
+ const PatriciaTrieReadingUtils::NodeFlags originalFlags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
+ const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
+ DynamicPtReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */,
+ false /* isDeleted */, false /* willBecomeNonTerminal */);
+ int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
+ // Update flags.
+ if (!DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags,
+ &writingPos)) {
+ return false;
+ }
+ // Update moved position, which is stored in the parent offset field.
+ if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(
+ mTrieBuffer, movedPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) {
+ return false;
+ }
+ if (toBeUpdatedPtNodeParams->hasChildren()) {
+ // Update children's parent position.
+ mReadingHelper.initWithPtNodeArrayPos(toBeUpdatedPtNodeParams->getChildrenPos());
+ while (!mReadingHelper.isEnd()) {
+ const PtNodeParams childPtNodeParams(mReadingHelper.getPtNodeParams());
+ int parentOffsetFieldPos = childPtNodeParams.getHeadPos()
+ + DynamicPtWritingUtils::NODE_FLAG_FIELD_SIZE;
+ if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(
+ mTrieBuffer, bigramLinkedNodePos, childPtNodeParams.getHeadPos(),
+ &parentOffsetFieldPos)) {
+ // Parent offset cannot be written because of a bug or a broken dictionary; thus,
+ // we give up to update dictionary.
+ return false;
+ }
+ mReadingHelper.readNextSiblingNode(childPtNodeParams);
+ }
+ }
+ return true;
+}
+
+bool Ver4PatriciaTrieNodeWriter::markPtNodeAsWillBecomeNonTerminal(
+ const PtNodeParams *const toBeUpdatedPtNodeParams) {
+ int pos = toBeUpdatedPtNodeParams->getHeadPos();
+ const bool usesAdditionalBuffer = mTrieBuffer->isInAdditionalBuffer(pos);
+ const uint8_t *const dictBuf = mTrieBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ pos -= mTrieBuffer->getOriginalBufferSize();
+ }
+ // Read original flags
+ const PatriciaTrieReadingUtils::NodeFlags originalFlags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
+ const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
+ DynamicPtReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */,
+ false /* isDeleted */, true /* willBecomeNonTerminal */);
+ if (!mBuffers->getMutableTerminalPositionLookupTable()->setTerminalPtNodePosition(
+ toBeUpdatedPtNodeParams->getTerminalId(), NOT_A_DICT_POS /* ptNodePos */)) {
+ AKLOGE("Cannot update terminal position lookup table. terminal id: %d",
+ toBeUpdatedPtNodeParams->getTerminalId());
+ return false;
+ }
+ // Update flags.
+ int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
+ return DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags,
+ &writingPos);
+}
+
+bool Ver4PatriciaTrieNodeWriter::updatePtNodeUnigramProperty(
+ const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const UnigramProperty *const unigramProperty) {
+ // Update probability and historical information.
+ // TODO: Update other information in the unigram property.
+ if (!toBeUpdatedPtNodeParams->isTerminal()) {
+ return false;
+ }
+ const ProbabilityEntry originalProbabilityEntry =
+ mBuffers->getLanguageModelDictContent()->getProbabilityEntry(
+ toBeUpdatedPtNodeParams->getTerminalId());
+ const ProbabilityEntry probabilityEntry = createUpdatedEntryFrom(&originalProbabilityEntry,
+ unigramProperty);
+ return mBuffers->getMutableLanguageModelDictContent()->setProbabilityEntry(
+ toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry);
+}
+
+bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(
+ const PtNodeParams *const toBeUpdatedPtNodeParams, bool *const outNeedsToKeepPtNode) {
+ if (!toBeUpdatedPtNodeParams->isTerminal()) {
+ AKLOGE("updatePtNodeProbabilityAndGetNeedsToSaveForGC is called for non-terminal PtNode.");
+ return false;
+ }
+ const ProbabilityEntry originalProbabilityEntry =
+ mBuffers->getLanguageModelDictContent()->getProbabilityEntry(
+ toBeUpdatedPtNodeParams->getTerminalId());
+ if (originalProbabilityEntry.hasHistoricalInfo()) {
+ const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
+ originalProbabilityEntry.getHistoricalInfo(), mHeaderPolicy);
+ const ProbabilityEntry probabilityEntry =
+ originalProbabilityEntry.createEntryWithUpdatedHistoricalInfo(&historicalInfo);
+ if (!mBuffers->getMutableLanguageModelDictContent()->setProbabilityEntry(
+ toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry)) {
+ AKLOGE("Cannot write updated probability entry. terminalId: %d",
+ toBeUpdatedPtNodeParams->getTerminalId());
+ return false;
+ }
+ const bool isValid = ForgettingCurveUtils::needsToKeep(&historicalInfo, mHeaderPolicy);
+ if (!isValid) {
+ if (!markPtNodeAsWillBecomeNonTerminal(toBeUpdatedPtNodeParams)) {
+ AKLOGE("Cannot mark PtNode as willBecomeNonTerminal.");
+ return false;
+ }
+ }
+ *outNeedsToKeepPtNode = isValid;
+ } else {
+ // No need to update probability.
+ *outNeedsToKeepPtNode = true;
+ }
+ return true;
+}
+
+bool Ver4PatriciaTrieNodeWriter::updateChildrenPosition(
+ const PtNodeParams *const toBeUpdatedPtNodeParams, const int newChildrenPosition) {
+ int childrenPosFieldPos = toBeUpdatedPtNodeParams->getChildrenPosFieldPos();
+ return DynamicPtWritingUtils::writeChildrenPositionAndAdvancePosition(mTrieBuffer,
+ newChildrenPosition, &childrenPosFieldPos);
+}
+
+bool Ver4PatriciaTrieNodeWriter::updateTerminalId(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const int newTerminalId) {
+ return mTrieBuffer->writeUint(newTerminalId, Ver4DictConstants::TERMINAL_ID_FIELD_SIZE,
+ toBeUpdatedPtNodeParams->getTerminalIdFieldPos());
+}
+
+bool Ver4PatriciaTrieNodeWriter::writePtNodeAndAdvancePosition(
+ const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) {
+ return writePtNodeAndGetTerminalIdAndAdvancePosition(ptNodeParams, 0 /* outTerminalId */,
+ ptNodeWritingPos);
+}
+
+
+bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
+ const PtNodeParams *const ptNodeParams, const UnigramProperty *const unigramProperty,
+ int *const ptNodeWritingPos) {
+ int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
+ if (!writePtNodeAndGetTerminalIdAndAdvancePosition(ptNodeParams, &terminalId,
+ ptNodeWritingPos)) {
+ return false;
+ }
+ // Write probability.
+ ProbabilityEntry newProbabilityEntry;
+ const ProbabilityEntry probabilityEntryToWrite = createUpdatedEntryFrom(
+ &newProbabilityEntry, unigramProperty);
+ return mBuffers->getMutableLanguageModelDictContent()->setProbabilityEntry(
+ terminalId, &probabilityEntryToWrite);
+}
+
+bool Ver4PatriciaTrieNodeWriter::addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
+ const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) {
+ if (!mBigramPolicy->addNewEntry(prevWordIds[0], wordId, bigramProperty, outAddedNewBigram)) {
+ AKLOGE("Cannot add new bigram entry. terminalId: %d, targetTerminalId: %d",
+ prevWordIds[0], wordId);
+ return false;
+ }
+ return true;
+}
+
+bool Ver4PatriciaTrieNodeWriter::removeNgramEntry(const WordIdArrayView prevWordIds,
+ const int wordId) {
+ return mBigramPolicy->removeEntry(prevWordIds[0], wordId);
+}
+
+bool Ver4PatriciaTrieNodeWriter::updateAllBigramEntriesAndDeleteUselessEntries(
+ const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount) {
+ return mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(
+ sourcePtNodeParams->getTerminalId(), outBigramEntryCount);
+}
+
+bool Ver4PatriciaTrieNodeWriter::updateAllPositionFields(
+ const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const DictPositionRelocationMap *const dictPositionRelocationMap,
+ int *const outBigramEntryCount) {
+ int parentPos = toBeUpdatedPtNodeParams->getParentPos();
+ if (parentPos != NOT_A_DICT_POS) {
+ PtNodeWriter::PtNodePositionRelocationMap::const_iterator it =
+ dictPositionRelocationMap->mPtNodePositionRelocationMap.find(parentPos);
+ if (it != dictPositionRelocationMap->mPtNodePositionRelocationMap.end()) {
+ parentPos = it->second;
+ }
+ }
+ int writingPos = toBeUpdatedPtNodeParams->getHeadPos()
+ + DynamicPtWritingUtils::NODE_FLAG_FIELD_SIZE;
+ // Write updated parent offset.
+ if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(mTrieBuffer,
+ parentPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) {
+ return false;
+ }
+
+ // Updates children position.
+ int childrenPos = toBeUpdatedPtNodeParams->getChildrenPos();
+ if (childrenPos != NOT_A_DICT_POS) {
+ PtNodeWriter::PtNodeArrayPositionRelocationMap::const_iterator it =
+ dictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.find(childrenPos);
+ if (it != dictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.end()) {
+ childrenPos = it->second;
+ }
+ }
+ if (!updateChildrenPosition(toBeUpdatedPtNodeParams, childrenPos)) {
+ return false;
+ }
+
+ // Counts bigram entries.
+ if (outBigramEntryCount) {
+ *outBigramEntryCount = mBigramPolicy->getBigramEntryConut(
+ toBeUpdatedPtNodeParams->getTerminalId());
+ }
+ return true;
+}
+
+bool Ver4PatriciaTrieNodeWriter::addShortcutTarget(const PtNodeParams *const ptNodeParams,
+ const int *const targetCodePoints, const int targetCodePointCount,
+ const int shortcutProbability) {
+ if (!mShortcutPolicy->addNewShortcut(ptNodeParams->getTerminalId(),
+ targetCodePoints, targetCodePointCount, shortcutProbability)) {
+ AKLOGE("Cannot add new shortuct entry. terminalId: %d", ptNodeParams->getTerminalId());
+ return false;
+ }
+ return true;
+}
+
+bool Ver4PatriciaTrieNodeWriter::writePtNodeAndGetTerminalIdAndAdvancePosition(
+ const PtNodeParams *const ptNodeParams, int *const outTerminalId,
+ int *const ptNodeWritingPos) {
+ const int nodePos = *ptNodeWritingPos;
+ // Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
+ // PtNode writing.
+ if (!DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer,
+ 0 /* nodeFlags */, ptNodeWritingPos)) {
+ return false;
+ }
+ // Calculate a parent offset and write the offset.
+ if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(mTrieBuffer,
+ ptNodeParams->getParentPos(), nodePos, ptNodeWritingPos)) {
+ return false;
+ }
+ // Write code points
+ if (!DynamicPtWritingUtils::writeCodePointsAndAdvancePosition(mTrieBuffer,
+ ptNodeParams->getCodePoints(), ptNodeParams->getCodePointCount(), ptNodeWritingPos)) {
+ return false;
+ }
+ int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
+ if (!ptNodeParams->willBecomeNonTerminal()) {
+ if (ptNodeParams->getTerminalId() != Ver4DictConstants::NOT_A_TERMINAL_ID) {
+ terminalId = ptNodeParams->getTerminalId();
+ } else if (ptNodeParams->isTerminal()) {
+ // Write terminal information using a new terminal id.
+ // Get a new unused terminal id.
+ terminalId = mBuffers->getTerminalPositionLookupTable()->getNextTerminalId();
+ }
+ }
+ const int isTerminal = terminalId != Ver4DictConstants::NOT_A_TERMINAL_ID;
+ if (isTerminal) {
+ // Update the lookup table.
+ if (!mBuffers->getMutableTerminalPositionLookupTable()->setTerminalPtNodePosition(
+ terminalId, nodePos)) {
+ return false;
+ }
+ // Write terminal Id.
+ if (!mTrieBuffer->writeUintAndAdvancePosition(terminalId,
+ Ver4DictConstants::TERMINAL_ID_FIELD_SIZE, ptNodeWritingPos)) {
+ return false;
+ }
+ if (outTerminalId) {
+ *outTerminalId = terminalId;
+ }
+ }
+ // Write children position
+ if (!DynamicPtWritingUtils::writeChildrenPositionAndAdvancePosition(mTrieBuffer,
+ ptNodeParams->getChildrenPos(), ptNodeWritingPos)) {
+ return false;
+ }
+ return updatePtNodeFlags(nodePos, ptNodeParams->isBlacklisted(), ptNodeParams->isNotAWord(),
+ isTerminal, ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
+}
+
+const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom(
+ const ProbabilityEntry *const originalProbabilityEntry,
+ const UnigramProperty *const unigramProperty) const {
+ // TODO: Consolidate historical info and probability.
+ if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
+ const HistoricalInfo historicalInfoForUpdate(unigramProperty->getTimestamp(),
+ unigramProperty->getLevel(), unigramProperty->getCount());
+ const HistoricalInfo updatedHistoricalInfo =
+ ForgettingCurveUtils::createUpdatedHistoricalInfo(
+ originalProbabilityEntry->getHistoricalInfo(),
+ unigramProperty->getProbability(), &historicalInfoForUpdate, mHeaderPolicy);
+ return originalProbabilityEntry->createEntryWithUpdatedHistoricalInfo(
+ &updatedHistoricalInfo);
+ } else {
+ return originalProbabilityEntry->createEntryWithUpdatedProbability(
+ unigramProperty->getProbability());
+ }
+}
+
+bool Ver4PatriciaTrieNodeWriter::updatePtNodeFlags(const int ptNodePos,
+ const bool isBlacklisted, const bool isNotAWord, const bool isTerminal,
+ const bool hasMultipleChars) {
+ // Create node flags and write them.
+ PatriciaTrieReadingUtils::NodeFlags nodeFlags =
+ PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord, isTerminal,
+ false /* hasShortcutTargets */, false /* hasBigrams */, hasMultipleChars,
+ CHILDREN_POSITION_FIELD_SIZE);
+ if (!DynamicPtWritingUtils::writeFlags(mTrieBuffer, nodeFlags, ptNodePos)) {
+ AKLOGE("Cannot write PtNode flags. flags: %x, pos: %d", nodeFlags, ptNodePos);
+ return false;
+ }
+ return true;
+}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
new file mode 100644
index 0000000..07c9f4b
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_PATRICIA_TRIE_NODE_WRITER_H
+#define LATINIME_VER4_PATRICIA_TRIE_NODE_WRITER_H
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
+
+namespace latinime {
+
+class BufferWithExtendableBuffer;
+class HeaderPolicy;
+class Ver4BigramListPolicy;
+class Ver4DictBuffers;
+class Ver4PatriciaTrieNodeReader;
+class Ver4PtNodeArrayReader;
+class Ver4ShortcutListPolicy;
+
+/*
+ * This class is used for helping to writes nodes of ver4 patricia trie.
+ */
+class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
+ public:
+ Ver4PatriciaTrieNodeWriter(BufferWithExtendableBuffer *const trieBuffer,
+ Ver4DictBuffers *const buffers, const HeaderPolicy *const headerPolicy,
+ const PtNodeReader *const ptNodeReader,
+ const PtNodeArrayReader *const ptNodeArrayReader,
+ Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy)
+ : mTrieBuffer(trieBuffer), mBuffers(buffers), mHeaderPolicy(headerPolicy),
+ mReadingHelper(ptNodeReader, ptNodeArrayReader), mBigramPolicy(bigramPolicy),
+ mShortcutPolicy(shortcutPolicy) {}
+
+ virtual ~Ver4PatriciaTrieNodeWriter() {}
+
+ virtual bool markPtNodeAsDeleted(const PtNodeParams *const toBeUpdatedPtNodeParams);
+
+ virtual bool markPtNodeAsMoved(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const int movedPos, const int bigramLinkedNodePos);
+
+ virtual bool markPtNodeAsWillBecomeNonTerminal(
+ const PtNodeParams *const toBeUpdatedPtNodeParams);
+
+ virtual bool updatePtNodeUnigramProperty(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const UnigramProperty *const unigramProperty);
+
+ virtual bool updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(
+ const PtNodeParams *const toBeUpdatedPtNodeParams, bool *const outNeedsToKeepPtNode);
+
+ virtual bool updateChildrenPosition(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const int newChildrenPosition);
+
+ bool updateTerminalId(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const int newTerminalId);
+
+ virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
+ int *const ptNodeWritingPos);
+
+ virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
+ const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos);
+
+ virtual bool addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
+ const BigramProperty *const bigramProperty, bool *const outAddedNewEntry);
+
+ virtual bool removeNgramEntry(const WordIdArrayView prevWordIds, const int wordId);
+
+ virtual bool updateAllBigramEntriesAndDeleteUselessEntries(
+ const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount);
+
+ virtual bool updateAllPositionFields(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const DictPositionRelocationMap *const dictPositionRelocationMap,
+ int *const outBigramEntryCount);
+
+ virtual bool addShortcutTarget(const PtNodeParams *const ptNodeParams,
+ const int *const targetCodePoints, const int targetCodePointCount,
+ const int shortcutProbability);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeWriter);
+
+ bool writePtNodeAndGetTerminalIdAndAdvancePosition(
+ const PtNodeParams *const ptNodeParams, int *const outTerminalId,
+ int *const ptNodeWritingPos);
+
+ // Create updated probability entry using given unigram property. In addition to the
+ // probability, this method updates historical information if needed.
+ // TODO: Update flags belonging to the unigram property.
+ const ProbabilityEntry createUpdatedEntryFrom(
+ const ProbabilityEntry *const originalProbabilityEntry,
+ const UnigramProperty *const unigramProperty) const;
+
+ bool updatePtNodeFlags(const int ptNodePos, const bool isBlacklisted, const bool isNotAWord,
+ const bool isTerminal, const bool hasMultipleChars);
+
+ static const int CHILDREN_POSITION_FIELD_SIZE;
+
+ BufferWithExtendableBuffer *const mTrieBuffer;
+ Ver4DictBuffers *const mBuffers;
+ const HeaderPolicy *const mHeaderPolicy;
+ DynamicPtReadingHelper mReadingHelper;
+ Ver4BigramListPolicy *const mBigramPolicy;
+ Ver4ShortcutListPolicy *const mShortcutPolicy;
+};
+} // namespace latinime
+#endif /* LATINIME_VER4_PATRICIA_TRIE_NODE_WRITER_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
new file mode 100644
index 0000000..257d932
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -0,0 +1,551 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h"
+
+#include <vector>
+
+#include "third_party/android_prediction/suggest/core/dicnode/dic_node.h"
+#include "third_party/android_prediction/suggest/core/dicnode/dic_node_vector.h"
+#include "third_party/android_prediction/suggest/core/dictionary/ngram_listener.h"
+#include "third_party/android_prediction/suggest/core/dictionary/property/bigram_property.h"
+#include "third_party/android_prediction/suggest/core/dictionary/property/unigram_property.h"
+#include "third_party/android_prediction/suggest/core/dictionary/property/word_property.h"
+#include "third_party/android_prediction/suggest/core/session/prev_words_info.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/probability_utils.h"
+
+namespace latinime {
+
+// Note that there are corresponding definitions in Java side in BinaryDictionaryTests and
+// BinaryDictionaryDecayingTests.
+const char *const Ver4PatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
+const char *const Ver4PatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
+const char *const Ver4PatriciaTriePolicy::MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT";
+const char *const Ver4PatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT";
+const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024;
+const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
+ Ver4DictConstants::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
+
+void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
+ DicNodeVector *const childDicNodes) const {
+ if (!dicNode->hasChildren()) {
+ return;
+ }
+ DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
+ readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos());
+ while (!readingHelper.isEnd()) {
+ const PtNodeParams ptNodeParams = readingHelper.getPtNodeParams();
+ if (!ptNodeParams.isValid()) {
+ break;
+ }
+ bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted();
+ if (isTerminal && mHeaderPolicy->isDecayingDict()) {
+ // A DecayingDict may have a terminal PtNode that has a terminal DicNode whose
+ // probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a
+ // valid terminal DicNode.
+ isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY;
+ }
+ readingHelper.readNextSiblingNode(ptNodeParams);
+ if (ptNodeParams.representsNonWordInfo()) {
+ // Skip PtNodes that represent non-word information.
+ continue;
+ }
+ childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(),
+ ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal,
+ ptNodeParams.hasChildren(),
+ ptNodeParams.isBlacklisted()
+ || ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */,
+ ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints());
+ }
+ if (readingHelper.isError()) {
+ mIsCorrupted = true;
+ AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
+ }
+}
+
+int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
+ const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
+ int *const outUnigramProbability) const {
+ DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
+ readingHelper.initWithPtNodePos(ptNodePos);
+ const int codePointCount = readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount(
+ maxCodePointCount, outCodePoints, outUnigramProbability);
+ if (readingHelper.isError()) {
+ mIsCorrupted = true;
+ AKLOGE("Dictionary reading error in getCodePointsAndProbabilityAndReturnCodePointCount().");
+ }
+ return codePointCount;
+}
+
+int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
+ const int length, const bool forceLowerCaseSearch) const {
+ DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
+ readingHelper.initWithPtNodeArrayPos(getRootPosition());
+ const int ptNodePos =
+ readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
+ if (readingHelper.isError()) {
+ mIsCorrupted = true;
+ AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
+ }
+ return ptNodePos;
+}
+
+int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability,
+ const int bigramProbability) const {
+ if (mHeaderPolicy->isDecayingDict()) {
+ // Both probabilities are encoded. Decode them and get probability.
+ return ForgettingCurveUtils::getProbability(unigramProbability, bigramProbability);
+ } else {
+ if (unigramProbability == NOT_A_PROBABILITY) {
+ return NOT_A_PROBABILITY;
+ } else if (bigramProbability == NOT_A_PROBABILITY) {
+ return ProbabilityUtils::backoff(unigramProbability);
+ } else {
+ return bigramProbability;
+ }
+ }
+}
+
+int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const int *const prevWordsPtNodePos,
+ const int ptNodePos) const {
+ if (ptNodePos == NOT_A_DICT_POS) {
+ return NOT_A_PROBABILITY;
+ }
+ const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
+ if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) {
+ return NOT_A_PROBABILITY;
+ }
+ if (prevWordsPtNodePos) {
+ const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]);
+ BinaryDictionaryBigramsIterator bigramsIt(&mBigramPolicy, bigramsPosition);
+ while (bigramsIt.hasNext()) {
+ bigramsIt.next();
+ if (bigramsIt.getBigramPos() == ptNodePos
+ && bigramsIt.getProbability() != NOT_A_PROBABILITY) {
+ return getProbability(ptNodeParams.getProbability(), bigramsIt.getProbability());
+ }
+ }
+ return NOT_A_PROBABILITY;
+ }
+ return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
+}
+
+void Ver4PatriciaTriePolicy::iterateNgramEntries(const int *const prevWordsPtNodePos,
+ NgramListener *const listener) const {
+ if (!prevWordsPtNodePos) {
+ return;
+ }
+ const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]);
+ BinaryDictionaryBigramsIterator bigramsIt(&mBigramPolicy, bigramsPosition);
+ while (bigramsIt.hasNext()) {
+ bigramsIt.next();
+ listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos());
+ }
+}
+
+int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
+ if (ptNodePos == NOT_A_DICT_POS) {
+ return NOT_A_DICT_POS;
+ }
+ const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
+ if (ptNodeParams.isDeleted()) {
+ return NOT_A_DICT_POS;
+ }
+ return mBuffers->getShortcutDictContent()->getShortcutListHeadPos(
+ ptNodeParams.getTerminalId());
+}
+
+int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
+ if (ptNodePos == NOT_A_DICT_POS) {
+ return NOT_A_DICT_POS;
+ }
+ const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
+ if (ptNodeParams.isDeleted()) {
+ return NOT_A_DICT_POS;
+ }
+ return mBuffers->getBigramDictContent()->getBigramListHeadPos(
+ ptNodeParams.getTerminalId());
+}
+
+bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int length,
+ const UnigramProperty *const unigramProperty) {
+ if (!mBuffers->isUpdatable()) {
+ AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary.");
+ return false;
+ }
+ if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
+ AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
+ mDictBuffer->getTailPosition());
+ return false;
+ }
+ if (length > MAX_WORD_LENGTH) {
+ AKLOGE("The word is too long to insert to the dictionary, length: %d", length);
+ return false;
+ }
+ for (const auto &shortcut : unigramProperty->getShortcuts()) {
+ if (shortcut.getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
+ AKLOGE("One of shortcut targets is too long to insert to the dictionary, length: %d",
+ shortcut.getTargetCodePoints()->size());
+ return false;
+ }
+ }
+ DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
+ readingHelper.initWithPtNodeArrayPos(getRootPosition());
+ bool addedNewUnigram = false;
+ int codePointsToAdd[MAX_WORD_LENGTH];
+ int codePointCountToAdd = length;
+ memmove(codePointsToAdd, word, sizeof(int) * length);
+ if (unigramProperty->representsBeginningOfSentence()) {
+ codePointCountToAdd = CharUtils::attachBeginningOfSentenceMarker(codePointsToAdd,
+ codePointCountToAdd, MAX_WORD_LENGTH);
+ }
+ if (codePointCountToAdd <= 0) {
+ return false;
+ }
+ if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointsToAdd, codePointCountToAdd,
+ unigramProperty, &addedNewUnigram)) {
+ if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) {
+ mUnigramCount++;
+ }
+ if (unigramProperty->getShortcuts().size() > 0) {
+ // Add shortcut target.
+ const int wordPos = getTerminalPtNodePositionOfWord(word, length,
+ false /* forceLowerCaseSearch */);
+ if (wordPos == NOT_A_DICT_POS) {
+ AKLOGE("Cannot find terminal PtNode position to add shortcut target.");
+ return false;
+ }
+ for (const auto &shortcut : unigramProperty->getShortcuts()) {
+ if (!mUpdatingHelper.addShortcutTarget(wordPos,
+ shortcut.getTargetCodePoints()->data(),
+ shortcut.getTargetCodePoints()->size(), shortcut.getProbability())) {
+ AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %d, "
+ "probability: %d", wordPos, shortcut.getTargetCodePoints()->size(),
+ shortcut.getProbability());
+ return false;
+ }
+ }
+ }
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool Ver4PatriciaTriePolicy::removeUnigramEntry(const int *const word, const int length) {
+ if (!mBuffers->isUpdatable()) {
+ AKLOGI("Warning: removeUnigramEntry() is called for non-updatable dictionary.");
+ return false;
+ }
+ const int ptNodePos = getTerminalPtNodePositionOfWord(word, length,
+ false /* forceLowerCaseSearch */);
+ if (ptNodePos == NOT_A_DICT_POS) {
+ return false;
+ }
+ const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
+ if (!mNodeWriter.markPtNodeAsDeleted(&ptNodeParams)) {
+ AKLOGE("Cannot remove unigram. ptNodePos: %d", ptNodePos);
+ return false;
+ }
+ if (!ptNodeParams.representsNonWordInfo()) {
+ mUnigramCount--;
+ }
+ return true;
+}
+
+bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ const BigramProperty *const bigramProperty) {
+ if (!mBuffers->isUpdatable()) {
+ AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
+ return false;
+ }
+ if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
+ AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
+ mDictBuffer->getTailPosition());
+ return false;
+ }
+ if (!prevWordsInfo->isValid()) {
+ AKLOGE("prev words info is not valid for adding n-gram entry to the dictionary.");
+ return false;
+ }
+ if (bigramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
+ AKLOGE("The word is too long to insert the ngram to the dictionary. "
+ "length: %d", bigramProperty->getTargetCodePoints()->size());
+ return false;
+ }
+ int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
+ false /* tryLowerCaseSearch */);
+ const auto prevWordsPtNodePosView = PtNodePosArrayView::fromFixedSizeArray(prevWordsPtNodePos);
+ // TODO: Support N-gram.
+ if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
+ if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)) {
+ const std::vector<UnigramProperty::ShortcutProperty> shortcuts;
+ const UnigramProperty beginningOfSentenceUnigramProperty(
+ true /* representsBeginningOfSentence */, true /* isNotAWord */,
+ false /* isBlacklisted */, MAX_PROBABILITY /* probability */,
+ NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
+ if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
+ prevWordsInfo->getNthPrevWordCodePointCount(1 /* n */),
+ &beginningOfSentenceUnigramProperty)) {
+ AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
+ return false;
+ }
+ // Refresh Terminal PtNode positions.
+ prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
+ false /* tryLowerCaseSearch */);
+ } else {
+ return false;
+ }
+ }
+ const int word1Pos = getTerminalPtNodePositionOfWord(
+ bigramProperty->getTargetCodePoints()->data(),
+ bigramProperty->getTargetCodePoints()->size(), false /* forceLowerCaseSearch */);
+ if (word1Pos == NOT_A_DICT_POS) {
+ return false;
+ }
+ bool addedNewEntry = false;
+ if (mUpdatingHelper.addNgramEntry(prevWordsPtNodePosView, word1Pos, bigramProperty,
+ &addedNewEntry)) {
+ if (addedNewEntry) {
+ mBigramCount++;
+ }
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ const int *const word, const int length) {
+ if (!mBuffers->isUpdatable()) {
+ AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
+ return false;
+ }
+ if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
+ AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
+ mDictBuffer->getTailPosition());
+ return false;
+ }
+ if (!prevWordsInfo->isValid()) {
+ AKLOGE("prev words info is not valid for removing n-gram entry form the dictionary.");
+ return false;
+ }
+ if (length > MAX_WORD_LENGTH) {
+ AKLOGE("word is too long to remove n-gram entry form the dictionary. length: %d", length);
+ }
+ int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
+ false /* tryLowerCaseSerch */);
+ const auto prevWordsPtNodePosView = PtNodePosArrayView::fromFixedSizeArray(prevWordsPtNodePos);
+ // TODO: Support N-gram.
+ if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
+ return false;
+ }
+ const int wordPos = getTerminalPtNodePositionOfWord(word, length,
+ false /* forceLowerCaseSearch */);
+ if (wordPos == NOT_A_DICT_POS) {
+ return false;
+ }
+ if (mUpdatingHelper.removeNgramEntry(prevWordsPtNodePosView, wordPos)) {
+ mBigramCount--;
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool Ver4PatriciaTriePolicy::flush(const char *const filePath) {
+ if (!mBuffers->isUpdatable()) {
+ AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath);
+ return false;
+ }
+ if (!mWritingHelper.writeToDictFile(filePath, mUnigramCount, mBigramCount)) {
+ AKLOGE("Cannot flush the dictionary to file.");
+ mIsCorrupted = true;
+ return false;
+ }
+ return true;
+}
+
+bool Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) {
+ if (!mBuffers->isUpdatable()) {
+ AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
+ return false;
+ }
+ if (!mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath)) {
+ AKLOGE("Cannot flush the dictionary to file with GC.");
+ mIsCorrupted = true;
+ return false;
+ }
+ return true;
+}
+
+bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
+ if (!mBuffers->isUpdatable()) {
+ AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
+ return false;
+ }
+ if (mBuffers->isNearSizeLimit()) {
+ // Additional buffer size is near the limit.
+ return true;
+ } else if (mHeaderPolicy->getExtendedRegionSize() + mDictBuffer->getUsedAdditionalBufferSize()
+ > Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE) {
+ // Total extended region size of the trie exceeds the limit.
+ return true;
+ } else if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS
+ && mDictBuffer->getUsedAdditionalBufferSize() > 0) {
+ // Needs to reduce dictionary size.
+ return true;
+ } else if (mHeaderPolicy->isDecayingDict()) {
+ return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mUnigramCount, mBigramCount,
+ mHeaderPolicy);
+ }
+ return false;
+}
+
+void Ver4PatriciaTriePolicy::getProperty(const char *const query, const int queryLength,
+ char *const outResult, const int maxResultLength) {
+ const int compareLength = queryLength + 1 /* terminator */;
+ if (strncmp(query, UNIGRAM_COUNT_QUERY, compareLength) == 0) {
+ snprintf(outResult, maxResultLength, "%d", mUnigramCount);
+ } else if (strncmp(query, BIGRAM_COUNT_QUERY, compareLength) == 0) {
+ snprintf(outResult, maxResultLength, "%d", mBigramCount);
+ } else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, compareLength) == 0) {
+ snprintf(outResult, maxResultLength, "%d",
+ mHeaderPolicy->isDecayingDict() ?
+ ForgettingCurveUtils::getUnigramCountHardLimit(
+ mHeaderPolicy->getMaxUnigramCount()) :
+ static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
+ } else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, compareLength) == 0) {
+ snprintf(outResult, maxResultLength, "%d",
+ mHeaderPolicy->isDecayingDict() ?
+ ForgettingCurveUtils::getBigramCountHardLimit(
+ mHeaderPolicy->getMaxBigramCount()) :
+ static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
+ }
+}
+
+const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const codePoints,
+ const int codePointCount) const {
+ const int ptNodePos = getTerminalPtNodePositionOfWord(codePoints, codePointCount,
+ false /* forceLowerCaseSearch */);
+ if (ptNodePos == NOT_A_DICT_POS) {
+ AKLOGE("getWordProperty is called for invalid word.");
+ return WordProperty();
+ }
+ const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
+ std::vector<int> codePointVector(ptNodeParams.getCodePoints(),
+ ptNodeParams.getCodePoints() + ptNodeParams.getCodePointCount());
+ const ProbabilityEntry probabilityEntry =
+ mBuffers->getLanguageModelDictContent()->getProbabilityEntry(
+ ptNodeParams.getTerminalId());
+ const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
+ // Fetch bigram information.
+ std::vector<BigramProperty> bigrams;
+ const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos);
+ if (bigramListPos != NOT_A_DICT_POS) {
+ int bigramWord1CodePoints[MAX_WORD_LENGTH];
+ const BigramDictContent *const bigramDictContent = mBuffers->getBigramDictContent();
+ const TerminalPositionLookupTable *const terminalPositionLookupTable =
+ mBuffers->getTerminalPositionLookupTable();
+ bool hasNext = true;
+ int readingPos = bigramListPos;
+ while (hasNext) {
+ const BigramEntry bigramEntry =
+ bigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
+ hasNext = bigramEntry.hasNext();
+ const int word1TerminalId = bigramEntry.getTargetTerminalId();
+ const int word1TerminalPtNodePos =
+ terminalPositionLookupTable->getTerminalPtNodePosition(word1TerminalId);
+ if (word1TerminalPtNodePos == NOT_A_DICT_POS) {
+ continue;
+ }
+ // Word (unigram) probability
+ int word1Probability = NOT_A_PROBABILITY;
+ const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
+ word1TerminalPtNodePos, MAX_WORD_LENGTH, bigramWord1CodePoints,
+ &word1Probability);
+ const std::vector<int> word1(bigramWord1CodePoints,
+ bigramWord1CodePoints + codePointCount);
+ const HistoricalInfo *const historicalInfo = bigramEntry.getHistoricalInfo();
+ const int probability = bigramEntry.hasHistoricalInfo() ?
+ ForgettingCurveUtils::decodeProbability(
+ bigramEntry.getHistoricalInfo(), mHeaderPolicy) :
+ bigramEntry.getProbability();
+ bigrams.emplace_back(&word1, probability,
+ historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
+ historicalInfo->getCount());
+ }
+ }
+ // Fetch shortcut information.
+ std::vector<UnigramProperty::ShortcutProperty> shortcuts;
+ int shortcutPos = getShortcutPositionOfPtNode(ptNodePos);
+ if (shortcutPos != NOT_A_DICT_POS) {
+ int shortcutTarget[MAX_WORD_LENGTH];
+ const ShortcutDictContent *const shortcutDictContent =
+ mBuffers->getShortcutDictContent();
+ bool hasNext = true;
+ while (hasNext) {
+ int shortcutTargetLength = 0;
+ int shortcutProbability = NOT_A_PROBABILITY;
+ shortcutDictContent->getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, shortcutTarget,
+ &shortcutTargetLength, &shortcutProbability, &hasNext, &shortcutPos);
+ const std::vector<int> target(shortcutTarget, shortcutTarget + shortcutTargetLength);
+ shortcuts.emplace_back(&target, shortcutProbability);
+ }
+ }
+ const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
+ ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
+ historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
+ historicalInfo->getCount(), &shortcuts);
+ return WordProperty(&codePointVector, &unigramProperty, &bigrams);
+}
+
+int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints,
+ int *const outCodePointCount) {
+ *outCodePointCount = 0;
+ if (token == 0) {
+ mTerminalPtNodePositionsForIteratingWords.clear();
+ DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy(
+ &mTerminalPtNodePositionsForIteratingWords);
+ DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
+ readingHelper.initWithPtNodeArrayPos(getRootPosition());
+ readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(&traversePolicy);
+ }
+ const int terminalPtNodePositionsVectorSize =
+ static_cast<int>(mTerminalPtNodePositionsForIteratingWords.size());
+ if (token < 0 || token >= terminalPtNodePositionsVectorSize) {
+ AKLOGE("Given token %d is invalid.", token);
+ return 0;
+ }
+ const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token];
+ int unigramProbability = NOT_A_PROBABILITY;
+ *outCodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
+ terminalPtNodePos, MAX_WORD_LENGTH, outCodePoints, &unigramProbability);
+ const int nextToken = token + 1;
+ if (nextToken >= terminalPtNodePositionsVectorSize) {
+ // All words have been iterated.
+ mTerminalPtNodePositionsForIteratingWords.clear();
+ return 0;
+ }
+ return nextToken;
+}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
new file mode 100644
index 0000000..e0b84b7
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
@@ -0,0 +1,150 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_PATRICIA_TRIE_POLICY_H
+#define LATINIME_VER4_PATRICIA_TRIE_POLICY_H
+
+#include <vector>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/header/header_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+class DicNode;
+class DicNodeVector;
+
+class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
+ public:
+ Ver4PatriciaTriePolicy(Ver4DictBuffers::Ver4DictBuffersPtr buffers)
+ : mBuffers(std::move(buffers)), mHeaderPolicy(mBuffers->getHeaderPolicy()),
+ mDictBuffer(mBuffers->getWritableTrieBuffer()),
+ mBigramPolicy(mBuffers->getMutableBigramDictContent(),
+ mBuffers->getTerminalPositionLookupTable(), mHeaderPolicy),
+ mShortcutPolicy(mBuffers->getMutableShortcutDictContent(),
+ mBuffers->getTerminalPositionLookupTable()),
+ mNodeReader(mDictBuffer, mBuffers->getLanguageModelDictContent(), mHeaderPolicy),
+ mPtNodeArrayReader(mDictBuffer),
+ mNodeWriter(mDictBuffer, mBuffers.get(), mHeaderPolicy, &mNodeReader,
+ &mPtNodeArrayReader, &mBigramPolicy, &mShortcutPolicy),
+ mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter),
+ mWritingHelper(mBuffers.get()),
+ mUnigramCount(mHeaderPolicy->getUnigramCount()),
+ mBigramCount(mHeaderPolicy->getBigramCount()),
+ mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {};
+
+ AK_FORCE_INLINE int getRootPosition() const {
+ return 0;
+ }
+
+ void createAndGetAllChildDicNodes(const DicNode *const dicNode,
+ DicNodeVector *const childDicNodes) const;
+
+ int getCodePointsAndProbabilityAndReturnCodePointCount(
+ const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints,
+ int *const outUnigramProbability) const;
+
+ int getTerminalPtNodePositionOfWord(const int *const inWord,
+ const int length, const bool forceLowerCaseSearch) const;
+
+ int getProbability(const int unigramProbability, const int bigramProbability) const;
+
+ int getProbabilityOfPtNode(const int *const prevWordsPtNodePos, const int ptNodePos) const;
+
+ void iterateNgramEntries(const int *const prevWordsPtNodePos,
+ NgramListener *const listener) const;
+
+ int getShortcutPositionOfPtNode(const int ptNodePos) const;
+
+ const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
+ return mHeaderPolicy;
+ }
+
+ const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
+ return &mShortcutPolicy;
+ }
+
+ bool addUnigramEntry(const int *const word, const int length,
+ const UnigramProperty *const unigramProperty);
+
+ bool removeUnigramEntry(const int *const word, const int length);
+
+ bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ const BigramProperty *const bigramProperty);
+
+ bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word1,
+ const int length1);
+
+ bool flush(const char *const filePath);
+
+ bool flushWithGC(const char *const filePath);
+
+ bool needsToRunGC(const bool mindsBlockByGC) const;
+
+ void getProperty(const char *const query, const int queryLength, char *const outResult,
+ const int maxResultLength);
+
+ const WordProperty getWordProperty(const int *const codePoints,
+ const int codePointCount) const;
+
+ int getNextWordAndNextToken(const int token, int *const outCodePoints,
+ int *const outCodePointCount);
+
+ bool isCorrupted() const {
+ return mIsCorrupted;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy);
+
+ static const char *const UNIGRAM_COUNT_QUERY;
+ static const char *const BIGRAM_COUNT_QUERY;
+ static const char *const MAX_UNIGRAM_COUNT_QUERY;
+ static const char *const MAX_BIGRAM_COUNT_QUERY;
+ // When the dictionary size is near the maximum size, we have to refuse dynamic operations to
+ // prevent the dictionary from overflowing.
+ static const int MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
+ static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
+
+ const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
+ const HeaderPolicy *const mHeaderPolicy;
+ BufferWithExtendableBuffer *const mDictBuffer;
+ Ver4BigramListPolicy mBigramPolicy;
+ Ver4ShortcutListPolicy mShortcutPolicy;
+ Ver4PatriciaTrieNodeReader mNodeReader;
+ Ver4PtNodeArrayReader mPtNodeArrayReader;
+ Ver4PatriciaTrieNodeWriter mNodeWriter;
+ DynamicPtUpdatingHelper mUpdatingHelper;
+ Ver4PatriciaTrieWritingHelper mWritingHelper;
+ int mUnigramCount;
+ int mBigramCount;
+ std::vector<int> mTerminalPtNodePositionsForIteratingWords;
+ mutable bool mIsCorrupted;
+
+ int getBigramsPositionOfPtNode(const int ptNodePos) const;
+};
+} // namespace latinime
+#endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp
new file mode 100644
index 0000000..45f9eb8
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h"
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/byte_array_utils.h"
+
+namespace latinime {
+
+/* static */ int Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(
+ const uint8_t *const buffer, int *pos) {
+ return ByteArrayUtils::readUint32AndAdvancePosition(buffer, pos);
+}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h
new file mode 100644
index 0000000..2b943ae
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_PATRICIA_TRIE_READING_UTILS_H
+#define LATINIME_VER4_PATRICIA_TRIE_READING_UTILS_H
+
+#include <cstdint>
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+class BufferWithExtendableBuffer;
+
+class Ver4PatriciaTrieReadingUtils {
+ public:
+ static int getTerminalIdAndAdvancePosition(const uint8_t *const buffer,
+ int *const pos);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTrieReadingUtils);
+};
+} // namespace latinime
+#endif /* LATINIME_VER4_PATRICIA_TRIE_READING_UTILS_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
new file mode 100644
index 0000000..8dc3c81
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
@@ -0,0 +1,294 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h"
+
+#include <cstring>
+#include <queue>
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/header/header_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/file_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+
+namespace latinime {
+
+bool Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const dictDirPath,
+ const int unigramCount, const int bigramCount) const {
+ const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy();
+ BufferWithExtendableBuffer headerBuffer(
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
+ const int extendedRegionSize = headerPolicy->getExtendedRegionSize()
+ + mBuffers->getTrieBuffer()->getUsedAdditionalBufferSize();
+ if (!headerPolicy->fillInAndWriteHeaderToBuffer(false /* updatesLastDecayedTime */,
+ unigramCount, bigramCount, extendedRegionSize, &headerBuffer)) {
+ AKLOGE("Cannot write header structure to buffer. "
+ "updatesLastDecayedTime: %d, unigramCount: %d, bigramCount: %d, "
+ "extendedRegionSize: %d", false, unigramCount, bigramCount,
+ extendedRegionSize);
+ return false;
+ }
+ return mBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer);
+}
+
+bool Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos,
+ const char *const dictDirPath) {
+ const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy();
+ Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers(
+ Ver4DictBuffers::createVer4DictBuffers(headerPolicy,
+ Ver4DictConstants::MAX_DICTIONARY_SIZE));
+ int unigramCount = 0;
+ int bigramCount = 0;
+ if (!runGC(rootPtNodeArrayPos, headerPolicy, dictBuffers.get(), &unigramCount, &bigramCount)) {
+ return false;
+ }
+ BufferWithExtendableBuffer headerBuffer(
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
+ if (!headerPolicy->fillInAndWriteHeaderToBuffer(true /* updatesLastDecayedTime */,
+ unigramCount, bigramCount, 0 /* extendedRegionSize */, &headerBuffer)) {
+ return false;
+ }
+ return dictBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer);
+}
+
+bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
+ const HeaderPolicy *const headerPolicy, Ver4DictBuffers *const buffersToWrite,
+ int *const outUnigramCount, int *const outBigramCount) {
+ Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(),
+ mBuffers->getLanguageModelDictContent(), headerPolicy);
+ Ver4PtNodeArrayReader ptNodeArrayReader(mBuffers->getTrieBuffer());
+ Ver4BigramListPolicy bigramPolicy(mBuffers->getMutableBigramDictContent(),
+ mBuffers->getTerminalPositionLookupTable(), headerPolicy);
+ Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getMutableShortcutDictContent(),
+ mBuffers->getTerminalPositionLookupTable());
+ Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(),
+ mBuffers, headerPolicy, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy,
+ &shortcutPolicy);
+
+ DynamicPtReadingHelper readingHelper(&ptNodeReader, &ptNodeArrayReader);
+ readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ DynamicPtGcEventListeners
+ ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
+ traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
+ &ptNodeWriter);
+ if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
+ &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
+ return false;
+ }
+ const int unigramCount = traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
+ .getValidUnigramCount();
+ const int maxUnigramCount = headerPolicy->getMaxUnigramCount();
+ if (headerPolicy->isDecayingDict() && unigramCount > maxUnigramCount) {
+ if (!truncateUnigrams(&ptNodeReader, &ptNodeWriter, maxUnigramCount)) {
+ AKLOGE("Cannot remove unigrams. current: %d, max: %d", unigramCount,
+ maxUnigramCount);
+ return false;
+ }
+ }
+
+ readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ DynamicPtGcEventListeners::TraversePolicyToUpdateBigramProbability
+ traversePolicyToUpdateBigramProbability(&ptNodeWriter);
+ if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
+ &traversePolicyToUpdateBigramProbability)) {
+ return false;
+ }
+ const int bigramCount = traversePolicyToUpdateBigramProbability.getValidBigramEntryCount();
+ const int maxBigramCount = headerPolicy->getMaxBigramCount();
+ if (headerPolicy->isDecayingDict() && bigramCount > maxBigramCount) {
+ if (!truncateBigrams(maxBigramCount)) {
+ AKLOGE("Cannot remove bigrams. current: %d, max: %d", bigramCount, maxBigramCount);
+ return false;
+ }
+ }
+
+ // Mapping from positions in mBuffer to positions in bufferToWrite.
+ PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap;
+ readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(),
+ buffersToWrite, headerPolicy, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy,
+ &shortcutPolicy);
+ DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
+ traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers,
+ buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap);
+ if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
+ &traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) {
+ return false;
+ }
+
+ // Create policy instances for the GCed dictionary.
+ Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(),
+ buffersToWrite->getLanguageModelDictContent(), headerPolicy);
+ Ver4PtNodeArrayReader newPtNodeArrayreader(buffersToWrite->getTrieBuffer());
+ Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getMutableBigramDictContent(),
+ buffersToWrite->getTerminalPositionLookupTable(), headerPolicy);
+ Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getMutableShortcutDictContent(),
+ buffersToWrite->getTerminalPositionLookupTable());
+ Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(),
+ buffersToWrite, headerPolicy, &newPtNodeReader, &newPtNodeArrayreader, &newBigramPolicy,
+ &newShortcutPolicy);
+ // Re-assign terminal IDs for valid terminal PtNodes.
+ TerminalPositionLookupTable::TerminalIdMap terminalIdMap;
+ if(!buffersToWrite->getMutableTerminalPositionLookupTable()->runGCTerminalIds(
+ &terminalIdMap)) {
+ return false;
+ }
+ // Run GC for probability dict content.
+ if (!buffersToWrite->getMutableLanguageModelDictContent()->runGC(&terminalIdMap,
+ mBuffers->getLanguageModelDictContent(), nullptr /* outNgramCount */)) {
+ return false;
+ }
+ // Run GC for bigram dict content.
+ if(!buffersToWrite->getMutableBigramDictContent()->runGC(&terminalIdMap,
+ mBuffers->getBigramDictContent(), outBigramCount)) {
+ return false;
+ }
+ // Run GC for shortcut dict content.
+ if(!buffersToWrite->getMutableShortcutDictContent()->runGC(&terminalIdMap,
+ mBuffers->getShortcutDictContent())) {
+ return false;
+ }
+ DynamicPtReadingHelper newDictReadingHelper(&newPtNodeReader, &newPtNodeArrayreader);
+ newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ DynamicPtGcEventListeners::TraversePolicyToUpdateAllPositionFields
+ traversePolicyToUpdateAllPositionFields(&newPtNodeWriter, &dictPositionRelocationMap);
+ if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
+ &traversePolicyToUpdateAllPositionFields)) {
+ return false;
+ }
+ newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds
+ traversePolicyToUpdateAllPtNodeFlagsAndTerminalIds(&newPtNodeWriter, &terminalIdMap);
+ if (!newDictReadingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
+ &traversePolicyToUpdateAllPtNodeFlagsAndTerminalIds)) {
+ return false;
+ }
+ *outUnigramCount = traversePolicyToUpdateAllPositionFields.getUnigramCount();
+ return true;
+}
+
+bool Ver4PatriciaTrieWritingHelper::truncateUnigrams(
+ const Ver4PatriciaTrieNodeReader *const ptNodeReader,
+ Ver4PatriciaTrieNodeWriter *const ptNodeWriter, const int maxUnigramCount) {
+ const TerminalPositionLookupTable *const terminalPosLookupTable =
+ mBuffers->getTerminalPositionLookupTable();
+ const int nextTerminalId = terminalPosLookupTable->getNextTerminalId();
+ std::priority_queue<DictProbability, std::vector<DictProbability>, DictProbabilityComparator>
+ priorityQueue;
+ for (int i = 0; i < nextTerminalId; ++i) {
+ const int terminalPos = terminalPosLookupTable->getTerminalPtNodePosition(i);
+ if (terminalPos == NOT_A_DICT_POS) {
+ continue;
+ }
+ const ProbabilityEntry probabilityEntry =
+ mBuffers->getLanguageModelDictContent()->getProbabilityEntry(i);
+ const int probability = probabilityEntry.hasHistoricalInfo() ?
+ ForgettingCurveUtils::decodeProbability(
+ probabilityEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) :
+ probabilityEntry.getProbability();
+ priorityQueue.push(DictProbability(terminalPos, probability,
+ probabilityEntry.getHistoricalInfo()->getTimeStamp()));
+ }
+
+ // Delete unigrams.
+ while (static_cast<int>(priorityQueue.size()) > maxUnigramCount) {
+ const int ptNodePos = priorityQueue.top().getDictPos();
+ priorityQueue.pop();
+ const PtNodeParams ptNodeParams =
+ ptNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
+ if (ptNodeParams.representsNonWordInfo()) {
+ continue;
+ }
+ if (!ptNodeWriter->markPtNodeAsWillBecomeNonTerminal(&ptNodeParams)) {
+ AKLOGE("Cannot mark PtNode as willBecomeNonterminal. PtNode pos: %d", ptNodePos);
+ return false;
+ }
+ }
+ return true;
+}
+
+bool Ver4PatriciaTrieWritingHelper::truncateBigrams(const int maxBigramCount) {
+ const TerminalPositionLookupTable *const terminalPosLookupTable =
+ mBuffers->getTerminalPositionLookupTable();
+ const int nextTerminalId = terminalPosLookupTable->getNextTerminalId();
+ std::priority_queue<DictProbability, std::vector<DictProbability>, DictProbabilityComparator>
+ priorityQueue;
+ BigramDictContent *const bigramDictContent = mBuffers->getMutableBigramDictContent();
+ for (int i = 0; i < nextTerminalId; ++i) {
+ const int bigramListPos = bigramDictContent->getBigramListHeadPos(i);
+ if (bigramListPos == NOT_A_DICT_POS) {
+ continue;
+ }
+ bool hasNext = true;
+ int readingPos = bigramListPos;
+ while (hasNext) {
+ const BigramEntry bigramEntry =
+ bigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
+ const int entryPos = readingPos - bigramDictContent->getBigramEntrySize();
+ hasNext = bigramEntry.hasNext();
+ if (!bigramEntry.isValid()) {
+ continue;
+ }
+ const int probability = bigramEntry.hasHistoricalInfo() ?
+ ForgettingCurveUtils::decodeProbability(
+ bigramEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) :
+ bigramEntry.getProbability();
+ priorityQueue.push(DictProbability(entryPos, probability,
+ bigramEntry.getHistoricalInfo()->getTimeStamp()));
+ }
+ }
+
+ // Delete bigrams.
+ while (static_cast<int>(priorityQueue.size()) > maxBigramCount) {
+ const int entryPos = priorityQueue.top().getDictPos();
+ const BigramEntry bigramEntry = bigramDictContent->getBigramEntry(entryPos);
+ const BigramEntry invalidatedBigramEntry = bigramEntry.getInvalidatedEntry();
+ if (!bigramDictContent->writeBigramEntry(&invalidatedBigramEntry, entryPos)) {
+ AKLOGE("Cannot write bigram entry to remove. pos: %d", entryPos);
+ return false;
+ }
+ priorityQueue.pop();
+ }
+ return true;
+}
+
+bool Ver4PatriciaTrieWritingHelper::TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds
+ ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
+ if (!ptNodeParams->isTerminal()) {
+ return true;
+ }
+ TerminalPositionLookupTable::TerminalIdMap::const_iterator it =
+ mTerminalIdMap->find(ptNodeParams->getTerminalId());
+ if (it == mTerminalIdMap->end()) {
+ AKLOGE("terminal Id %d is not in the terminal position map. map size: %zd",
+ ptNodeParams->getTerminalId(), mTerminalIdMap->size());
+ return false;
+ }
+ if (!mPtNodeWriter->updateTerminalId(ptNodeParams, it->second)) {
+ AKLOGE("Cannot update terminal id. %d -> %d", it->first, it->second);
+ return false;
+ }
+ return true;
+}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h
new file mode 100644
index 0000000..655e00a
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h
@@ -0,0 +1,125 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_PATRICIA_TRIE_WRITING_HELPER_H
+#define LATINIME_VER4_PATRICIA_TRIE_WRITING_HELPER_H
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
+
+namespace latinime {
+
+class HeaderPolicy;
+class Ver4DictBuffers;
+class Ver4PatriciaTrieNodeReader;
+class Ver4PatriciaTrieNodeWriter;
+
+class Ver4PatriciaTrieWritingHelper {
+ public:
+ Ver4PatriciaTrieWritingHelper(Ver4DictBuffers *const buffers)
+ : mBuffers(buffers) {}
+
+ bool writeToDictFile(const char *const dictDirPath, const int unigramCount,
+ const int bigramCount) const;
+
+ // This method cannot be const because the original dictionary buffer will be updated to detect
+ // useless PtNodes during GC.
+ bool writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const dictDirPath);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTrieWritingHelper);
+
+ class TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds
+ : public DynamicPtReadingHelper::TraversingEventListener {
+ public:
+ TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds(
+ Ver4PatriciaTrieNodeWriter *const ptNodeWriter,
+ const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap)
+ : mPtNodeWriter(ptNodeWriter), mTerminalIdMap(terminalIdMap) {}
+
+ bool onAscend() { return true; }
+
+ bool onDescend(const int ptNodeArrayPos) { return true; }
+
+ bool onReadingPtNodeArrayTail() { return true; }
+
+ bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds);
+
+ Ver4PatriciaTrieNodeWriter *const mPtNodeWriter;
+ const TerminalPositionLookupTable::TerminalIdMap *const mTerminalIdMap;
+ };
+
+ // For truncateUnigrams() and truncateBigrams().
+ class DictProbability {
+ public:
+ DictProbability(const int dictPos, const int probability, const int timestamp)
+ : mDictPos(dictPos), mProbability(probability), mTimestamp(timestamp) {}
+
+ int getDictPos() const {
+ return mDictPos;
+ }
+
+ int getProbability() const {
+ return mProbability;
+ }
+
+ int getTimestamp() const {
+ return mTimestamp;
+ }
+
+ private:
+ DISALLOW_DEFAULT_CONSTRUCTOR(DictProbability);
+
+ int mDictPos;
+ int mProbability;
+ int mTimestamp;
+ };
+
+ // For truncateUnigrams() and truncateBigrams().
+ class DictProbabilityComparator {
+ public:
+ bool operator()(const DictProbability &left, const DictProbability &right) {
+ if (left.getProbability() != right.getProbability()) {
+ return left.getProbability() > right.getProbability();
+ }
+ if (left.getTimestamp() != right.getTimestamp()) {
+ return left.getTimestamp() < right.getTimestamp();
+ }
+ return left.getDictPos() > right.getDictPos();
+ }
+
+ private:
+ DISALLOW_ASSIGNMENT_OPERATOR(DictProbabilityComparator);
+ };
+
+ bool runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy,
+ Ver4DictBuffers *const buffersToWrite, int *const outUnigramCount,
+ int *const outBigramCount);
+
+ bool truncateUnigrams(const Ver4PatriciaTrieNodeReader *const ptNodeReader,
+ Ver4PatriciaTrieNodeWriter *const ptNodeWriter, const int maxUnigramCount);
+
+ bool truncateBigrams(const int maxBigramCount);
+
+ Ver4DictBuffers *const mBuffers;
+};
+} // namespace latinime
+
+#endif /* LATINIME_VER4_PATRICIA_TRIE_WRITING_HELPER_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.cpp
new file mode 100644
index 0000000..2306636
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h"
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+bool Ver4PtNodeArrayReader::readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos,
+ int *const outPtNodeCount, int *const outFirstPtNodePos) const {
+ if (ptNodeArrayPos < 0 || ptNodeArrayPos >= mBuffer->getTailPosition()) {
+ // Reading invalid position because of a bug or a broken dictionary.
+ AKLOGE("Reading PtNode array info from invalid dictionary position: %d, dict size: %d",
+ ptNodeArrayPos, mBuffer->getTailPosition());
+ ASSERT(false);
+ return false;
+ }
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodeArrayPos);
+ const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
+ int readingPos = ptNodeArrayPos;
+ if (usesAdditionalBuffer) {
+ readingPos -= mBuffer->getOriginalBufferSize();
+ }
+ const int ptNodeCountInArray = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
+ dictBuf, &readingPos);
+ if (usesAdditionalBuffer) {
+ readingPos += mBuffer->getOriginalBufferSize();
+ }
+ if (ptNodeCountInArray < 0) {
+ AKLOGE("Invalid PtNode count in an array: %d.", ptNodeCountInArray);
+ return false;
+ }
+ *outPtNodeCount = ptNodeCountInArray;
+ *outFirstPtNodePos = readingPos;
+ return true;
+}
+
+bool Ver4PtNodeArrayReader::readForwardLinkAndReturnIfValid(const int forwordLinkPos,
+ int *const outNextPtNodeArrayPos) const {
+ if (forwordLinkPos < 0 || forwordLinkPos >= mBuffer->getTailPosition()) {
+ // Reading invalid position because of bug or broken dictionary.
+ AKLOGE("Reading forward link from invalid dictionary position: %d, dict size: %d",
+ forwordLinkPos, mBuffer->getTailPosition());
+ ASSERT(false);
+ return false;
+ }
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(forwordLinkPos);
+ const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
+ int readingPos = forwordLinkPos;
+ if (usesAdditionalBuffer) {
+ readingPos -= mBuffer->getOriginalBufferSize();
+ }
+ const int nextPtNodeArrayOffset =
+ DynamicPtReadingUtils::getForwardLinkPosition(dictBuf, readingPos);
+ if (DynamicPtReadingUtils::isValidForwardLinkPosition(nextPtNodeArrayOffset)) {
+ *outNextPtNodeArrayPos = forwordLinkPos + nextPtNodeArrayOffset;
+ } else {
+ *outNextPtNodeArrayPos = NOT_A_DICT_POS;
+ }
+ return true;
+}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h
new file mode 100644
index 0000000..919db43
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_PT_NODE_ARRAY_READER_H
+#define LATINIME_VER4_PT_NODE_ARRAY_READER_H
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h"
+
+namespace latinime {
+
+class BufferWithExtendableBuffer;
+
+class Ver4PtNodeArrayReader : public PtNodeArrayReader {
+ public:
+ Ver4PtNodeArrayReader(const BufferWithExtendableBuffer *const buffer) : mBuffer(buffer) {};
+
+ virtual bool readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos,
+ int *const outPtNodeCount, int *const outFirstPtNodePos) const;
+ virtual bool readForwardLinkAndReturnIfValid(const int forwordLinkPos,
+ int *const outNextPtNodeArrayPos) const;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(Ver4PtNodeArrayReader);
+
+ const BufferWithExtendableBuffer *const mBuffer;
+};
+} // namespace latinime
+#endif /* LATINIME_VER4_PT_NODE_ARRAY_READER_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
new file mode 100644
index 0000000..3ab4ba8
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
@@ -0,0 +1,168 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+const size_t BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024;
+const int BufferWithExtendableBuffer::NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE = 90;
+// TODO: Needs to allocate larger memory corresponding to the current vector size.
+const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 128 * 1024;
+
+uint32_t BufferWithExtendableBuffer::readUint(const int size, const int pos) const {
+ const bool readingPosIsInAdditionalBuffer = isInAdditionalBuffer(pos);
+ const int posInBuffer = readingPosIsInAdditionalBuffer ? pos - mOriginalBuffer.size() : pos;
+ return ByteArrayUtils::readUint(getBuffer(readingPosIsInAdditionalBuffer), size, posInBuffer);
+}
+
+uint32_t BufferWithExtendableBuffer::readUintAndAdvancePosition(const int size,
+ int *const pos) const {
+ const int value = readUint(size, *pos);
+ *pos += size;
+ return value;
+}
+
+void BufferWithExtendableBuffer::readCodePointsAndAdvancePosition(const int maxCodePointCount,
+ int *const outCodePoints, int *outCodePointCount, int *const pos) const {
+ const bool readingPosIsInAdditionalBuffer = isInAdditionalBuffer(*pos);
+ if (readingPosIsInAdditionalBuffer) {
+ *pos -= mOriginalBuffer.size();
+ }
+ *outCodePointCount = ByteArrayUtils::readStringAndAdvancePosition(
+ getBuffer(readingPosIsInAdditionalBuffer), maxCodePointCount, outCodePoints, pos);
+ if (readingPosIsInAdditionalBuffer) {
+ *pos += mOriginalBuffer.size();
+ }
+}
+
+bool BufferWithExtendableBuffer::extend(const int size) {
+ return checkAndPrepareWriting(getTailPosition(), size);
+}
+
+bool BufferWithExtendableBuffer::writeUint(const uint32_t data, const int size, const int pos) {
+ int writingPos = pos;
+ return writeUintAndAdvancePosition(data, size, &writingPos);
+}
+
+bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data, const int size,
+ int *const pos) {
+ if (!(size >= 1 && size <= 4)) {
+ AKLOGI("writeUintAndAdvancePosition() is called with invalid size: %d", size);
+ ASSERT(false);
+ return false;
+ }
+ if (!checkAndPrepareWriting(*pos, size)) {
+ return false;
+ }
+ const bool usesAdditionalBuffer = isInAdditionalBuffer(*pos);
+ uint8_t *const buffer =
+ usesAdditionalBuffer ? mAdditionalBuffer.data() : mOriginalBuffer.data();
+ if (usesAdditionalBuffer) {
+ *pos -= mOriginalBuffer.size();
+ }
+ ByteArrayUtils::writeUintAndAdvancePosition(buffer, data, size, pos);
+ if (usesAdditionalBuffer) {
+ *pos += mOriginalBuffer.size();
+ }
+ return true;
+}
+
+bool BufferWithExtendableBuffer::writeCodePointsAndAdvancePosition(const int *const codePoints,
+ const int codePointCount, const bool writesTerminator, int *const pos) {
+ const size_t size = ByteArrayUtils::calculateRequiredByteCountToStoreCodePoints(
+ codePoints, codePointCount, writesTerminator);
+ if (!checkAndPrepareWriting(*pos, size)) {
+ return false;
+ }
+ const bool usesAdditionalBuffer = isInAdditionalBuffer(*pos);
+ uint8_t *const buffer =
+ usesAdditionalBuffer ? mAdditionalBuffer.data() : mOriginalBuffer.data();
+ if (usesAdditionalBuffer) {
+ *pos -= mOriginalBuffer.size();
+ }
+ ByteArrayUtils::writeCodePointsAndAdvancePosition(buffer, codePoints, codePointCount,
+ writesTerminator, pos);
+ if (usesAdditionalBuffer) {
+ *pos += mOriginalBuffer.size();
+ }
+ return true;
+}
+
+bool BufferWithExtendableBuffer::extendBuffer(const size_t size) {
+ const size_t extendSize = std::max(EXTEND_ADDITIONAL_BUFFER_SIZE_STEP, size);
+ const size_t sizeAfterExtending =
+ std::min(mAdditionalBuffer.size() + extendSize, mMaxAdditionalBufferSize);
+ if (sizeAfterExtending < mAdditionalBuffer.size() + size) {
+ return false;
+ }
+ mAdditionalBuffer.resize(sizeAfterExtending);
+ return true;
+}
+
+bool BufferWithExtendableBuffer::checkAndPrepareWriting(const int pos, const int size) {
+ if (pos < 0 || size < 0) {
+ // Invalid position or size.
+ return false;
+ }
+ const size_t totalRequiredSize = static_cast<size_t>(pos + size);
+ if (!isInAdditionalBuffer(pos)) {
+ // Here don't need to care about the additional buffer.
+ if (mOriginalBuffer.size() < totalRequiredSize) {
+ // Violate the boundary.
+ return false;
+ }
+ // The buffer has sufficient capacity.
+ return true;
+ }
+ // Hereafter, pos is in the additional buffer.
+ const size_t tailPosition = static_cast<size_t>(getTailPosition());
+ if (totalRequiredSize <= tailPosition) {
+ // The buffer has sufficient capacity.
+ return true;
+ }
+ if (static_cast<size_t>(pos) != tailPosition) {
+ // The additional buffer must be extended from the tail position.
+ return false;
+ }
+ const size_t extendSize = totalRequiredSize -
+ std::min(mAdditionalBuffer.size() + mOriginalBuffer.size(), totalRequiredSize);
+ if (extendSize > 0 && !extendBuffer(extendSize)) {
+ // Failed to extend the buffer.
+ return false;
+ }
+ mUsedAdditionalBufferSize += size;
+ return true;
+}
+
+bool BufferWithExtendableBuffer::copy(const BufferWithExtendableBuffer *const sourceBuffer) {
+ int copyingPos = 0;
+ const int tailPos = sourceBuffer->getTailPosition();
+ const int maxDataChunkSize = sizeof(uint32_t);
+ while (copyingPos < tailPos) {
+ const int remainingSize = tailPos - copyingPos;
+ const int copyingSize = (remainingSize >= maxDataChunkSize) ?
+ maxDataChunkSize : remainingSize;
+ const uint32_t data = sourceBuffer->readUint(copyingSize, copyingPos);
+ if (!writeUint(data, copyingSize, copyingPos)) {
+ return false;
+ }
+ copyingPos += copyingSize;
+ }
+ return true;
+}
+
+}
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
new file mode 100644
index 0000000..0744052
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
@@ -0,0 +1,125 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_BUFFER_WITH_EXTENDABLE_BUFFER_H
+#define LATINIME_BUFFER_WITH_EXTENDABLE_BUFFER_H
+
+#include <cstddef>
+#include <cstdint>
+#include <vector>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/byte_array_utils.h"
+#include "third_party/android_prediction/utils/byte_array_view.h"
+
+namespace latinime {
+
+// This is used as a buffer that can be extended for updatable dictionaries.
+// To optimize performance, raw pointer is directly used for reading buffer. The position has to be
+// adjusted to access additional buffer. On the other hand, this class does not provide writable
+// raw pointer but provides several methods that handle boundary checking for writing data.
+class BufferWithExtendableBuffer {
+ public:
+ static const size_t DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE;
+
+ BufferWithExtendableBuffer(const ReadWriteByteArrayView originalBuffer,
+ const int maxAdditionalBufferSize)
+ : mOriginalBuffer(originalBuffer), mAdditionalBuffer(), mUsedAdditionalBufferSize(0),
+ mMaxAdditionalBufferSize(maxAdditionalBufferSize) {}
+
+ // Without original buffer.
+ BufferWithExtendableBuffer(const int maxAdditionalBufferSize)
+ : mOriginalBuffer(), mAdditionalBuffer(), mUsedAdditionalBufferSize(0),
+ mMaxAdditionalBufferSize(maxAdditionalBufferSize) {}
+
+ AK_FORCE_INLINE int getTailPosition() const {
+ return mOriginalBuffer.size() + mUsedAdditionalBufferSize;
+ }
+
+ AK_FORCE_INLINE int getUsedAdditionalBufferSize() const {
+ return mUsedAdditionalBufferSize;
+ }
+
+ /**
+ * For reading.
+ */
+ AK_FORCE_INLINE bool isInAdditionalBuffer(const int position) const {
+ return position >= static_cast<int>(mOriginalBuffer.size());
+ }
+
+ // TODO: Resolve the issue that the address can be changed when the vector is resized.
+ // CAVEAT!: Be careful about array out of bound access with buffers
+ AK_FORCE_INLINE const uint8_t *getBuffer(const bool usesAdditionalBuffer) const {
+ if (usesAdditionalBuffer) {
+ return mAdditionalBuffer.data();
+ } else {
+ return mOriginalBuffer.data();
+ }
+ }
+
+ uint32_t readUint(const int size, const int pos) const;
+
+ uint32_t readUintAndAdvancePosition(const int size, int *const pos) const;
+
+ void readCodePointsAndAdvancePosition(const int maxCodePointCount,
+ int *const outCodePoints, int *outCodePointCount, int *const pos) const;
+
+ AK_FORCE_INLINE int getOriginalBufferSize() const {
+ return mOriginalBuffer.size();
+ }
+
+ AK_FORCE_INLINE bool isNearSizeLimit() const {
+ return mAdditionalBuffer.size() >= ((mMaxAdditionalBufferSize
+ * NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE) / 100);
+ }
+
+ bool extend(const int size);
+
+ /**
+ * For writing.
+ *
+ * Writing is allowed for original buffer, already written region of additional buffer and the
+ * tail of additional buffer.
+ */
+ bool writeUint(const uint32_t data, const int size, const int pos);
+
+ bool writeUintAndAdvancePosition(const uint32_t data, const int size, int *const pos);
+
+ bool writeCodePointsAndAdvancePosition(const int *const codePoints, const int codePointCount,
+ const bool writesTerminator, int *const pos);
+
+ bool copy(const BufferWithExtendableBuffer *const sourceBuffer);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(BufferWithExtendableBuffer);
+
+ static const int NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE;
+ static const size_t EXTEND_ADDITIONAL_BUFFER_SIZE_STEP;
+
+ const ReadWriteByteArrayView mOriginalBuffer;
+ std::vector<uint8_t> mAdditionalBuffer;
+ int mUsedAdditionalBufferSize;
+ const size_t mMaxAdditionalBufferSize;
+
+ // Return if the buffer is successfully extended or not.
+ bool extendBuffer(const size_t size);
+
+ // Returns if it is possible to write size-bytes from pos. When pos is at the tail position of
+ // the additional buffer, try extending the buffer.
+ bool checkAndPrepareWriting(const int pos, const int size);
+};
+}
+#endif /* LATINIME_BUFFER_WITH_EXTENDABLE_BUFFER_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/utils/byte_array_utils.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/byte_array_utils.cpp
new file mode 100644
index 0000000..0e14f2c
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/byte_array_utils.cpp
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/byte_array_utils.h"
+
+namespace latinime {
+
+const uint8_t ByteArrayUtils::MINIMUM_ONE_BYTE_CHARACTER_VALUE = 0x20;
+const uint8_t ByteArrayUtils::MAXIMUM_ONE_BYTE_CHARACTER_VALUE = 0xFF;
+const uint8_t ByteArrayUtils::CHARACTER_ARRAY_TERMINATOR = 0x1F;
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/utils/byte_array_utils.h b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/byte_array_utils.h
new file mode 100644
index 0000000..72ed66c
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/byte_array_utils.h
@@ -0,0 +1,279 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_BYTE_ARRAY_UTILS_H
+#define LATINIME_BYTE_ARRAY_UTILS_H
+
+#include <cstdint>
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+/**
+ * Utility methods for reading byte arrays.
+ */
+class ByteArrayUtils {
+ public:
+ /**
+ * Integer writing
+ *
+ * Each method write a corresponding size integer in a big endian manner.
+ */
+ static AK_FORCE_INLINE void writeUintAndAdvancePosition(uint8_t *const buffer,
+ const uint32_t data, const int size, int *const pos) {
+ // size must be in 1 to 4.
+ ASSERT(size >= 1 && size <= 4);
+ switch (size) {
+ case 1:
+ ByteArrayUtils::writeUint8AndAdvancePosition(buffer, data, pos);
+ return;
+ case 2:
+ ByteArrayUtils::writeUint16AndAdvancePosition(buffer, data, pos);
+ return;
+ case 3:
+ ByteArrayUtils::writeUint24AndAdvancePosition(buffer, data, pos);
+ return;
+ case 4:
+ ByteArrayUtils::writeUint32AndAdvancePosition(buffer, data, pos);
+ return;
+ default:
+ break;
+ }
+ }
+
+ /**
+ * Integer reading
+ *
+ * Each method read a corresponding size integer in a big endian manner.
+ */
+ static AK_FORCE_INLINE uint32_t readUint32(const uint8_t *const buffer, const int pos) {
+ return (buffer[pos] << 24) ^ (buffer[pos + 1] << 16)
+ ^ (buffer[pos + 2] << 8) ^ buffer[pos + 3];
+ }
+
+ static AK_FORCE_INLINE uint32_t readUint24(const uint8_t *const buffer, const int pos) {
+ return (buffer[pos] << 16) ^ (buffer[pos + 1] << 8) ^ buffer[pos + 2];
+ }
+
+ static AK_FORCE_INLINE uint16_t readUint16(const uint8_t *const buffer, const int pos) {
+ return (buffer[pos] << 8) ^ buffer[pos + 1];
+ }
+
+ static AK_FORCE_INLINE uint8_t readUint8(const uint8_t *const buffer, const int pos) {
+ return buffer[pos];
+ }
+
+ static AK_FORCE_INLINE uint32_t readUint32AndAdvancePosition(
+ const uint8_t *const buffer, int *const pos) {
+ const uint32_t value = readUint32(buffer, *pos);
+ *pos += 4;
+ return value;
+ }
+
+ static AK_FORCE_INLINE int readSint24AndAdvancePosition(
+ const uint8_t *const buffer, int *const pos) {
+ const uint8_t value = readUint8(buffer, *pos);
+ if (value < 0x80) {
+ return readUint24AndAdvancePosition(buffer, pos);
+ } else {
+ (*pos)++;
+ return -(((value & 0x7F) << 16) ^ readUint16AndAdvancePosition(buffer, pos));
+ }
+ }
+
+ static AK_FORCE_INLINE uint32_t readUint24AndAdvancePosition(
+ const uint8_t *const buffer, int *const pos) {
+ const uint32_t value = readUint24(buffer, *pos);
+ *pos += 3;
+ return value;
+ }
+
+ static AK_FORCE_INLINE uint16_t readUint16AndAdvancePosition(
+ const uint8_t *const buffer, int *const pos) {
+ const uint16_t value = readUint16(buffer, *pos);
+ *pos += 2;
+ return value;
+ }
+
+ static AK_FORCE_INLINE uint8_t readUint8AndAdvancePosition(
+ const uint8_t *const buffer, int *const pos) {
+ return buffer[(*pos)++];
+ }
+
+ static AK_FORCE_INLINE int readUint(const uint8_t *const buffer,
+ const int size, const int pos) {
+ // size must be in 1 to 4.
+ ASSERT(size >= 1 && size <= 4);
+ switch (size) {
+ case 1:
+ return ByteArrayUtils::readUint8(buffer, pos);
+ case 2:
+ return ByteArrayUtils::readUint16(buffer, pos);
+ case 3:
+ return ByteArrayUtils::readUint24(buffer, pos);
+ case 4:
+ return ByteArrayUtils::readUint32(buffer, pos);
+ default:
+ return 0;
+ }
+ }
+
+ /**
+ * Code Point Reading
+ *
+ * 1 byte = bbbbbbbb match
+ * case 000xxxxx: xxxxx << 16 + next byte << 8 + next byte
+ * else: if 00011111 (= 0x1F) : this is the terminator. This is a relevant choice because
+ * unicode code points range from 0 to 0x10FFFF, so any 3-byte value starting with
+ * 00011111 would be outside unicode.
+ * else: iso-latin-1 code
+ * This allows for the whole unicode range to be encoded, including chars outside of
+ * the BMP. Also everything in the iso-latin-1 charset is only 1 byte, except control
+ * characters which should never happen anyway (and still work, but take 3 bytes).
+ */
+ static AK_FORCE_INLINE int readCodePoint(const uint8_t *const buffer, const int pos) {
+ int p = pos;
+ return readCodePointAndAdvancePosition(buffer, &p);
+ }
+
+ static AK_FORCE_INLINE int readCodePointAndAdvancePosition(
+ const uint8_t *const buffer, int *const pos) {
+ const uint8_t firstByte = readUint8(buffer, *pos);
+ if (firstByte < MINIMUM_ONE_BYTE_CHARACTER_VALUE) {
+ if (firstByte == CHARACTER_ARRAY_TERMINATOR) {
+ *pos += 1;
+ return NOT_A_CODE_POINT;
+ } else {
+ return readUint24AndAdvancePosition(buffer, pos);
+ }
+ } else {
+ *pos += 1;
+ return firstByte;
+ }
+ }
+
+ /**
+ * String (array of code points) Reading
+ *
+ * Reads code points until the terminator is found.
+ */
+ // Returns the length of the string.
+ static int readStringAndAdvancePosition(const uint8_t *const buffer,
+ const int maxLength, int *const outBuffer, int *const pos) {
+ int length = 0;
+ int codePoint = readCodePointAndAdvancePosition(buffer, pos);
+ while (NOT_A_CODE_POINT != codePoint && length < maxLength) {
+ outBuffer[length++] = codePoint;
+ codePoint = readCodePointAndAdvancePosition(buffer, pos);
+ }
+ return length;
+ }
+
+ // Advances the position and returns the length of the string.
+ static int advancePositionToBehindString(
+ const uint8_t *const buffer, const int maxLength, int *const pos) {
+ int length = 0;
+ int codePoint = readCodePointAndAdvancePosition(buffer, pos);
+ while (NOT_A_CODE_POINT != codePoint && length < maxLength) {
+ codePoint = readCodePointAndAdvancePosition(buffer, pos);
+ length++;
+ }
+ return length;
+ }
+
+ /**
+ * String (array of code points) Writing
+ */
+ static void writeCodePointsAndAdvancePosition(uint8_t *const buffer,
+ const int *const codePoints, const int codePointCount, const bool writesTerminator,
+ int *const pos) {
+ for (int i = 0; i < codePointCount; ++i) {
+ const int codePoint = codePoints[i];
+ if (codePoint == NOT_A_CODE_POINT || codePoint == CHARACTER_ARRAY_TERMINATOR) {
+ break;
+ } else if (codePoint < MINIMUM_ONE_BYTE_CHARACTER_VALUE
+ || codePoint > MAXIMUM_ONE_BYTE_CHARACTER_VALUE) {
+ // three bytes character.
+ writeUint24AndAdvancePosition(buffer, codePoint, pos);
+ } else {
+ // one byte character.
+ writeUint8AndAdvancePosition(buffer, codePoint, pos);
+ }
+ }
+ if (writesTerminator) {
+ writeUint8AndAdvancePosition(buffer, CHARACTER_ARRAY_TERMINATOR, pos);
+ }
+ }
+
+ static int calculateRequiredByteCountToStoreCodePoints(const int *const codePoints,
+ const int codePointCount, const bool writesTerminator) {
+ int byteCount = 0;
+ for (int i = 0; i < codePointCount; ++i) {
+ const int codePoint = codePoints[i];
+ if (codePoint == NOT_A_CODE_POINT || codePoint == CHARACTER_ARRAY_TERMINATOR) {
+ break;
+ } else if (codePoint < MINIMUM_ONE_BYTE_CHARACTER_VALUE
+ || codePoint > MAXIMUM_ONE_BYTE_CHARACTER_VALUE) {
+ // three bytes character.
+ byteCount += 3;
+ } else {
+ // one byte character.
+ byteCount += 1;
+ }
+ }
+ if (writesTerminator) {
+ // The terminator is one byte.
+ byteCount += 1;
+ }
+ return byteCount;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(ByteArrayUtils);
+
+ static const uint8_t MINIMUM_ONE_BYTE_CHARACTER_VALUE;
+ static const uint8_t MAXIMUM_ONE_BYTE_CHARACTER_VALUE;
+ static const uint8_t CHARACTER_ARRAY_TERMINATOR;
+
+ static AK_FORCE_INLINE void writeUint32AndAdvancePosition(uint8_t *const buffer,
+ const uint32_t data, int *const pos) {
+ buffer[(*pos)++] = (data >> 24) & 0xFF;
+ buffer[(*pos)++] = (data >> 16) & 0xFF;
+ buffer[(*pos)++] = (data >> 8) & 0xFF;
+ buffer[(*pos)++] = data & 0xFF;
+ }
+
+ static AK_FORCE_INLINE void writeUint24AndAdvancePosition(uint8_t *const buffer,
+ const uint32_t data, int *const pos) {
+ buffer[(*pos)++] = (data >> 16) & 0xFF;
+ buffer[(*pos)++] = (data >> 8) & 0xFF;
+ buffer[(*pos)++] = data & 0xFF;
+ }
+
+ static AK_FORCE_INLINE void writeUint16AndAdvancePosition(uint8_t *const buffer,
+ const uint16_t data, int *const pos) {
+ buffer[(*pos)++] = (data >> 8) & 0xFF;
+ buffer[(*pos)++] = data & 0xFF;
+ }
+
+ static AK_FORCE_INLINE void writeUint8AndAdvancePosition(uint8_t *const buffer,
+ const uint8_t data, int *const pos) {
+ buffer[(*pos)++] = data & 0xFF;
+ }
+};
+} // namespace latinime
+#endif /* LATINIME_BYTE_ARRAY_UTILS_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
new file mode 100644
index 0000000..a7e6f6e
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
@@ -0,0 +1,144 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
+
+#include <cstdio>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/header/header_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/file_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/format_utils.h"
+#include "third_party/android_prediction/utils/time_keeper.h"
+
+namespace latinime {
+
+const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = ".tmp";
+// Enough size to describe buffer size.
+const int DictFileWritingUtils::SIZE_OF_BUFFER_SIZE_FIELD = 4;
+
+/* static */ bool DictFileWritingUtils::createEmptyDictFile(const char *const filePath,
+ const int dictVersion, const std::vector<int> localeAsCodePointVector,
+ const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap) {
+ TimeKeeper::setCurrentTime();
+ const FormatUtils::FORMAT_VERSION formatVersion = FormatUtils::getFormatVersion(dictVersion);
+ switch (formatVersion) {
+ case FormatUtils::VERSION_4:
+ return createEmptyV4DictFile<backward::v402::Ver4DictConstants,
+ backward::v402::Ver4DictBuffers,
+ backward::v402::Ver4DictBuffers::Ver4DictBuffersPtr>(
+ filePath, localeAsCodePointVector, attributeMap, formatVersion);
+ case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
+ case FormatUtils::VERSION_4_DEV:
+ return createEmptyV4DictFile<Ver4DictConstants, Ver4DictBuffers,
+ Ver4DictBuffers::Ver4DictBuffersPtr>(
+ filePath, localeAsCodePointVector, attributeMap, formatVersion);
+ default:
+ AKLOGE("Cannot create dictionary %s because format version %d is not supported.",
+ filePath, dictVersion);
+ return false;
+ }
+}
+
+template<class DictConstants, class DictBuffers, class DictBuffersPtr>
+/* static */ bool DictFileWritingUtils::createEmptyV4DictFile(const char *const dirPath,
+ const std::vector<int> localeAsCodePointVector,
+ const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap,
+ const FormatUtils::FORMAT_VERSION formatVersion) {
+ HeaderPolicy headerPolicy(formatVersion, localeAsCodePointVector, attributeMap);
+ DictBuffersPtr dictBuffers = DictBuffers::createVer4DictBuffers(&headerPolicy,
+ DictConstants::MAX_DICT_EXTENDED_REGION_SIZE);
+ headerPolicy.fillInAndWriteHeaderToBuffer(true /* updatesLastDecayedTime */,
+ 0 /* unigramCount */, 0 /* bigramCount */,
+ 0 /* extendedRegionSize */, dictBuffers->getWritableHeaderBuffer());
+ if (!DynamicPtWritingUtils::writeEmptyDictionary(
+ dictBuffers->getWritableTrieBuffer(), 0 /* rootPos */)) {
+ AKLOGE("Empty ver4 dictionary structure cannot be created on memory.");
+ return false;
+ }
+ return dictBuffers->flush(dirPath);
+}
+
+/* static */ bool DictFileWritingUtils::flushBufferToFileWithSuffix(const char *const basePath,
+ const char *const suffix, const BufferWithExtendableBuffer *const buffer) {
+ const int filePathBufSize = FileUtils::getFilePathWithSuffixBufSize(basePath, suffix);
+ char filePath[filePathBufSize];
+ FileUtils::getFilePathWithSuffix(basePath, suffix, filePathBufSize, filePath);
+ return flushBufferToFile(filePath, buffer);
+}
+
+/* static */ bool DictFileWritingUtils::writeBufferToFileTail(FILE *const file,
+ const BufferWithExtendableBuffer *const buffer) {
+ uint8_t bufferSize[SIZE_OF_BUFFER_SIZE_FIELD];
+ int writingPos = 0;
+ ByteArrayUtils::writeUintAndAdvancePosition(bufferSize, buffer->getTailPosition(),
+ SIZE_OF_BUFFER_SIZE_FIELD, &writingPos);
+ if (fwrite(bufferSize, SIZE_OF_BUFFER_SIZE_FIELD, 1 /* count */, file) < 1) {
+ return false;
+ }
+ return writeBufferToFile(file, buffer);
+}
+
+/* static */ bool DictFileWritingUtils::flushBufferToFile(const char *const filePath,
+ const BufferWithExtendableBuffer *const buffer) {
+ const int fd = open(filePath, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
+ if (fd == -1) {
+ AKLOGE("File %s cannot be opened. errno: %d", filePath, errno);
+ ASSERT(false);
+ return false;
+ }
+ FILE *const file = fdopen(fd, "wb");
+ if (!file) {
+ AKLOGE("fdopen failed for the file %s. errno: %d", filePath, errno);
+ ASSERT(false);
+ return false;
+ }
+ if (!writeBufferToFile(file, buffer)) {
+ fclose(file);
+ remove(filePath);
+ AKLOGE("Buffer cannot be written to the file %s. size: %d", filePath,
+ buffer->getTailPosition());
+ ASSERT(false);
+ return false;
+ }
+ fclose(file);
+ return true;
+}
+
+// Returns whether the writing was succeeded or not.
+/* static */ bool DictFileWritingUtils::writeBufferToFile(FILE *const file,
+ const BufferWithExtendableBuffer *const buffer) {
+ const int originalBufSize = buffer->getOriginalBufferSize();
+ if (originalBufSize > 0 && fwrite(buffer->getBuffer(false /* usesAdditionalBuffer */),
+ originalBufSize, 1, file) < 1) {
+ return false;
+ }
+ const int additionalBufSize = buffer->getUsedAdditionalBufferSize();
+ if (additionalBufSize > 0 && fwrite(buffer->getBuffer(true /* usesAdditionalBuffer */),
+ additionalBufSize, 1, file) < 1) {
+ return false;
+ }
+ return true;
+}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h
new file mode 100644
index 0000000..83573a5
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_FILE_WRITING_UTILS_H
+#define LATINIME_DICT_FILE_WRITING_UTILS_H
+
+#include <cstdio>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/header/header_read_write_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/format_utils.h"
+
+namespace latinime {
+
+class BufferWithExtendableBuffer;
+
+class DictFileWritingUtils {
+ public:
+ static const char *const TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE;
+
+ static bool createEmptyDictFile(const char *const filePath, const int dictVersion,
+ const std::vector<int> localeAsCodePointVector,
+ const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap);
+
+ static bool flushBufferToFileWithSuffix(const char *const basePath, const char *const suffix,
+ const BufferWithExtendableBuffer *const buffer);
+
+ static bool writeBufferToFileTail(FILE *const file,
+ const BufferWithExtendableBuffer *const buffer);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DictFileWritingUtils);
+
+ static const int SIZE_OF_BUFFER_SIZE_FIELD;
+
+ static bool createEmptyV401DictFile(const char *const filePath,
+ const std::vector<int> localeAsCodePointVector,
+ const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap,
+ const FormatUtils::FORMAT_VERSION formatVersion);
+
+ template<class DictConstants, class DictBuffers, class DictBuffersPtr>
+ static bool createEmptyV4DictFile(const char *const filePath,
+ const std::vector<int> localeAsCodePointVector,
+ const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap,
+ const FormatUtils::FORMAT_VERSION formatVersion);
+
+ static bool flushBufferToFile(const char *const filePath,
+ const BufferWithExtendableBuffer *const buffer);
+
+ static bool writeBufferToFile(FILE *const file,
+ const BufferWithExtendableBuffer *const buffer);
+};
+} // namespace latinime
+#endif /* LATINIME_DICT_FILE_WRITING_UTILS_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/utils/file_utils.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/file_utils.cpp
new file mode 100644
index 0000000..9622ec6
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/file_utils.cpp
@@ -0,0 +1,171 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/file_utils.h"
+
+#include <cstdio>
+#include <cstring>
+#include <dirent.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+namespace latinime {
+
+// Returns -1 on error.
+/* static */ int FileUtils::getFileSize(const char *const filePath) {
+ const int fd = open(filePath, O_RDONLY);
+ if (fd == -1) {
+ return -1;
+ }
+ struct stat statBuf;
+ if (fstat(fd, &statBuf) != 0) {
+ close(fd);
+ return -1;
+ }
+ close(fd);
+ return static_cast<int>(statBuf.st_size);
+}
+
+/* static */ bool FileUtils::existsDir(const char *const dirPath) {
+ DIR *const dir = opendir(dirPath);
+ if (dir == NULL) {
+ return false;
+ }
+ closedir(dir);
+ return true;
+}
+
+// Remove a directory and all files in the directory.
+/* static */ bool FileUtils::removeDirAndFiles(const char *const dirPath) {
+ return removeDirAndFiles(dirPath, 5 /* maxTries */);
+}
+
+// Remove a directory and all files in the directory, trying up to maxTimes.
+/* static */ bool FileUtils::removeDirAndFiles(const char *const dirPath, const int maxTries) {
+ DIR *const dir = opendir(dirPath);
+ if (dir == NULL) {
+ AKLOGE("Cannot open dir %s.", dirPath);
+ return true;
+ }
+ struct dirent *dirent;
+ while ((dirent = readdir(dir)) != NULL) {
+ if (dirent->d_type == DT_DIR) {
+ continue;
+ }
+ if (strcmp(dirent->d_name, ".") == 0 || strcmp(dirent->d_name, "..") == 0) {
+ continue;
+ }
+ const int filePathBufSize = getFilePathBufSize(dirPath, dirent->d_name);
+ char filePath[filePathBufSize];
+ getFilePath(dirPath, dirent->d_name, filePathBufSize, filePath);
+ if (remove(filePath) != 0) {
+ AKLOGE("Cannot remove file %s.", filePath);
+ closedir(dir);
+ return false;
+ }
+ }
+ closedir(dir);
+ if (remove(dirPath) != 0) {
+ if (maxTries > 0) {
+ // On NFS, deleting files sometimes creates new files. I'm not sure what the
+ // correct way of dealing with this is, but for the time being, this seems to work.
+ removeDirAndFiles(dirPath, maxTries - 1);
+ } else {
+ AKLOGE("Cannot remove directory %s.", dirPath);
+ return false;
+ }
+ }
+ return true;
+}
+
+/* static */ int FileUtils::getFilePathWithSuffixBufSize(const char *const filePath,
+ const char *const suffix) {
+ return strlen(filePath) + strlen(suffix) + 1 /* terminator */;
+}
+
+/* static */ void FileUtils::getFilePathWithSuffix(const char *const filePath,
+ const char *const suffix, const int filePathBufSize, char *const outFilePath) {
+ snprintf(outFilePath, filePathBufSize, "%s%s", filePath, suffix);
+}
+
+/* static */ int FileUtils::getFilePathBufSize(const char *const dirPath,
+ const char *const fileName) {
+ return strlen(dirPath) + 1 /* '/' */ + strlen(fileName) + 1 /* terminator */;
+}
+
+/* static */ void FileUtils::getFilePath(const char *const dirPath, const char *const fileName,
+ const int filePathBufSize, char *const outFilePath) {
+ snprintf(outFilePath, filePathBufSize, "%s/%s", dirPath, fileName);
+}
+
+/* static */ bool FileUtils::getFilePathWithoutSuffix(const char *const filePath,
+ const char *const suffix, const int outDirPathBufSize, char *const outDirPath) {
+ const int filePathLength = strlen(filePath);
+ const int suffixLength = strlen(suffix);
+ if (filePathLength <= suffixLength) {
+ AKLOGE("File path length (%s:%d) is shorter that suffix length (%s:%d).",
+ filePath, filePathLength, suffix, suffixLength);
+ return false;
+ }
+ const int resultFilePathLength = filePathLength - suffixLength;
+ if (outDirPathBufSize <= resultFilePathLength) {
+ AKLOGE("outDirPathBufSize is too small. filePath: %s, suffix: %s, outDirPathBufSize: %d",
+ filePath, suffix, outDirPathBufSize);
+ return false;
+ }
+ if (strncmp(filePath + resultFilePathLength, suffix, suffixLength) != 0) {
+ AKLOGE("File Path %s does not have %s as a suffix", filePath, suffix);
+ return false;
+ }
+ snprintf(outDirPath, resultFilePathLength + 1 /* terminator */, "%s", filePath);
+ return true;
+}
+
+/* static */ void FileUtils::getDirPath(const char *const filePath, const int outDirPathBufSize,
+ char *const outDirPath) {
+ for (int i = strlen(filePath) - 1; i >= 0; --i) {
+ if (filePath[i] == '/') {
+ if (i >= outDirPathBufSize) {
+ AKLOGE("outDirPathBufSize is too small. filePath: %s, outDirPathBufSize: %d",
+ filePath, outDirPathBufSize);
+ ASSERT(false);
+ return;
+ }
+ snprintf(outDirPath, i + 1 /* terminator */, "%s", filePath);
+ return;
+ }
+ }
+}
+
+/* static */ void FileUtils::getBasename(const char *const filePath,
+ const int outNameBufSize, char *const outName) {
+ const int filePathBufSize = strlen(filePath) + 1 /* terminator */;
+ char filePathBuf[filePathBufSize];
+ snprintf(filePathBuf, filePathBufSize, "%s", filePath);
+ const char *const baseName = basename(filePathBuf);
+ const int baseNameLength = strlen(baseName);
+ if (baseNameLength >= outNameBufSize) {
+ AKLOGE("outNameBufSize is too small. filePath: %s, outNameBufSize: %d",
+ filePath, outNameBufSize);
+ return;
+ }
+ snprintf(outName, baseNameLength + 1 /* terminator */, "%s", baseName);
+}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/utils/file_utils.h b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/file_utils.h
new file mode 100644
index 0000000..38c9d91
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/file_utils.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_FILE_UTILS_H
+#define LATINIME_FILE_UTILS_H
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+class FileUtils {
+ public:
+ // Returns -1 on error.
+ static int getFileSize(const char *const filePath);
+
+ static bool existsDir(const char *const dirPath);
+
+ // Remove a directory and all files in the directory.
+ static bool removeDirAndFiles(const char *const dirPath);
+
+ static int getFilePathWithSuffixBufSize(const char *const filePath, const char *const suffix);
+
+ static void getFilePathWithSuffix(const char *const filePath, const char *const suffix,
+ const int filePathBufSize, char *const outFilePath);
+
+ static int getFilePathBufSize(const char *const dirPath, const char *const fileName);
+
+ static void getFilePath(const char *const dirPath, const char *const fileName,
+ const int filePathBufSize, char *const outFilePath);
+
+ // Returns whether the filePath have the suffix.
+ static bool getFilePathWithoutSuffix(const char *const filePath, const char *const suffix,
+ const int dirPathBufSize, char *const outDirPath);
+
+ static void getDirPath(const char *const filePath, const int dirPathBufSize,
+ char *const outDirPath);
+
+ static void getBasename(const char *const filePath, const int outNameBufSize,
+ char *const outName);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(FileUtils);
+
+ static bool removeDirAndFiles(const char *const dirPath, const int maxTries);
+};
+} // namespace latinime
+#endif /* LATINIME_FILE_UTILS_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
new file mode 100644
index 0000000..031f936
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
@@ -0,0 +1,239 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+
+#include <algorithm>
+#include <cmath>
+#include <stdlib.h>
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/header/header_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/probability_utils.h"
+#include "third_party/android_prediction/utils/time_keeper.h"
+
+namespace latinime {
+
+const int ForgettingCurveUtils::MULTIPLIER_TWO_IN_PROBABILITY_SCALE = 8;
+const int ForgettingCurveUtils::DECAY_INTERVAL_SECONDS = 2 * 60 * 60;
+
+const int ForgettingCurveUtils::MAX_LEVEL = 3;
+const int ForgettingCurveUtils::MIN_VISIBLE_LEVEL = 1;
+const int ForgettingCurveUtils::MAX_ELAPSED_TIME_STEP_COUNT = 15;
+const int ForgettingCurveUtils::DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD = 14;
+
+const float ForgettingCurveUtils::UNIGRAM_COUNT_HARD_LIMIT_WEIGHT = 1.2;
+const float ForgettingCurveUtils::BIGRAM_COUNT_HARD_LIMIT_WEIGHT = 1.2;
+
+const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityTable;
+
+// TODO: Revise the logic to decide the initial probability depending on the given probability.
+/* static */ const HistoricalInfo ForgettingCurveUtils::createUpdatedHistoricalInfo(
+ const HistoricalInfo *const originalHistoricalInfo, const int newProbability,
+ const HistoricalInfo *const newHistoricalInfo, const HeaderPolicy *const headerPolicy) {
+ const int timestamp = newHistoricalInfo->getTimeStamp();
+ if (newProbability != NOT_A_PROBABILITY && originalHistoricalInfo->getLevel() == 0) {
+ // Add entry as a valid word.
+ const int level = clampToVisibleEntryLevelRange(newHistoricalInfo->getLevel());
+ const int count = clampToValidCountRange(newHistoricalInfo->getCount(), headerPolicy);
+ return HistoricalInfo(timestamp, level, count);
+ } else if (!originalHistoricalInfo->isValid()
+ || originalHistoricalInfo->getLevel() < newHistoricalInfo->getLevel()
+ || (originalHistoricalInfo->getLevel() == newHistoricalInfo->getLevel()
+ && originalHistoricalInfo->getCount() < newHistoricalInfo->getCount())) {
+ // Initial information.
+ const int level = clampToValidLevelRange(newHistoricalInfo->getLevel());
+ const int count = clampToValidCountRange(newHistoricalInfo->getCount(), headerPolicy);
+ return HistoricalInfo(timestamp, level, count);
+ } else {
+ const int updatedCount = originalHistoricalInfo->getCount() + 1;
+ if (updatedCount >= headerPolicy->getForgettingCurveOccurrencesToLevelUp()) {
+ // The count exceeds the max value the level can be incremented.
+ if (originalHistoricalInfo->getLevel() >= MAX_LEVEL) {
+ // The level is already max.
+ return HistoricalInfo(timestamp,
+ originalHistoricalInfo->getLevel(), originalHistoricalInfo->getCount());
+ } else {
+ // Level up.
+ return HistoricalInfo(timestamp,
+ originalHistoricalInfo->getLevel() + 1, 0 /* count */);
+ }
+ } else {
+ return HistoricalInfo(timestamp, originalHistoricalInfo->getLevel(), updatedCount);
+ }
+ }
+}
+
+/* static */ int ForgettingCurveUtils::decodeProbability(
+ const HistoricalInfo *const historicalInfo, const HeaderPolicy *const headerPolicy) {
+ const int elapsedTimeStepCount = getElapsedTimeStepCount(historicalInfo->getTimeStamp(),
+ headerPolicy->getForgettingCurveDurationToLevelDown());
+ return sProbabilityTable.getProbability(
+ headerPolicy->getForgettingCurveProbabilityValuesTableId(),
+ clampToValidLevelRange(historicalInfo->getLevel()),
+ clampToValidTimeStepCountRange(elapsedTimeStepCount));
+}
+
+/* static */ int ForgettingCurveUtils::getProbability(const int unigramProbability,
+ const int bigramProbability) {
+ if (unigramProbability == NOT_A_PROBABILITY) {
+ return NOT_A_PROBABILITY;
+ } else if (bigramProbability == NOT_A_PROBABILITY) {
+ return std::min(backoff(unigramProbability), MAX_PROBABILITY);
+ } else {
+ // TODO: Investigate better way to handle bigram probability.
+ return std::min(std::max(unigramProbability,
+ bigramProbability + MULTIPLIER_TWO_IN_PROBABILITY_SCALE), MAX_PROBABILITY);
+ }
+}
+
+/* static */ bool ForgettingCurveUtils::needsToKeep(const HistoricalInfo *const historicalInfo,
+ const HeaderPolicy *const headerPolicy) {
+ return historicalInfo->getLevel() > 0
+ || getElapsedTimeStepCount(historicalInfo->getTimeStamp(),
+ headerPolicy->getForgettingCurveDurationToLevelDown())
+ < DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD;
+}
+
+/* static */ const HistoricalInfo ForgettingCurveUtils::createHistoricalInfoToSave(
+ const HistoricalInfo *const originalHistoricalInfo,
+ const HeaderPolicy *const headerPolicy) {
+ if (originalHistoricalInfo->getTimeStamp() == NOT_A_TIMESTAMP) {
+ return HistoricalInfo();
+ }
+ const int durationToLevelDownInSeconds = headerPolicy->getForgettingCurveDurationToLevelDown();
+ const int elapsedTimeStep = getElapsedTimeStepCount(
+ originalHistoricalInfo->getTimeStamp(), durationToLevelDownInSeconds);
+ if (elapsedTimeStep <= MAX_ELAPSED_TIME_STEP_COUNT) {
+ // No need to update historical info.
+ return *originalHistoricalInfo;
+ }
+ // Level down.
+ const int maxLevelDownAmonut = elapsedTimeStep / (MAX_ELAPSED_TIME_STEP_COUNT + 1);
+ const int levelDownAmount = (maxLevelDownAmonut >= originalHistoricalInfo->getLevel()) ?
+ originalHistoricalInfo->getLevel() : maxLevelDownAmonut;
+ const int adjustedTimestampInSeconds = originalHistoricalInfo->getTimeStamp() +
+ levelDownAmount * durationToLevelDownInSeconds;
+ return HistoricalInfo(adjustedTimestampInSeconds,
+ originalHistoricalInfo->getLevel() - levelDownAmount, 0 /* count */);
+}
+
+/* static */ bool ForgettingCurveUtils::needsToDecay(const bool mindsBlockByDecay,
+ const int unigramCount, const int bigramCount, const HeaderPolicy *const headerPolicy) {
+ if (unigramCount >= getUnigramCountHardLimit(headerPolicy->getMaxUnigramCount())) {
+ // Unigram count exceeds the limit.
+ return true;
+ } else if (bigramCount >= getBigramCountHardLimit(headerPolicy->getMaxBigramCount())) {
+ // Bigram count exceeds the limit.
+ return true;
+ }
+ if (mindsBlockByDecay) {
+ return false;
+ }
+ if (headerPolicy->getLastDecayedTime() + DECAY_INTERVAL_SECONDS
+ < TimeKeeper::peekCurrentTime()) {
+ // Time to decay.
+ return true;
+ }
+ return false;
+}
+
+// See comments in ProbabilityUtils::backoff().
+/* static */ int ForgettingCurveUtils::backoff(const int unigramProbability) {
+ // See TODO comments in ForgettingCurveUtils::getProbability().
+ return unigramProbability;
+}
+
+/* static */ int ForgettingCurveUtils::getElapsedTimeStepCount(const int timestamp,
+ const int durationToLevelDownInSeconds) {
+ const int elapsedTimeInSeconds = TimeKeeper::peekCurrentTime() - timestamp;
+ const int timeStepDurationInSeconds =
+ durationToLevelDownInSeconds / (MAX_ELAPSED_TIME_STEP_COUNT + 1);
+ return elapsedTimeInSeconds / timeStepDurationInSeconds;
+}
+
+/* static */ int ForgettingCurveUtils::clampToVisibleEntryLevelRange(const int level) {
+ return std::min(std::max(level, MIN_VISIBLE_LEVEL), MAX_LEVEL);
+}
+
+/* static */ int ForgettingCurveUtils::clampToValidCountRange(const int count,
+ const HeaderPolicy *const headerPolicy) {
+ return std::min(std::max(count, 0), headerPolicy->getForgettingCurveOccurrencesToLevelUp() - 1);
+}
+
+/* static */ int ForgettingCurveUtils::clampToValidLevelRange(const int level) {
+ return std::min(std::max(level, 0), MAX_LEVEL);
+}
+
+/* static */ int ForgettingCurveUtils::clampToValidTimeStepCountRange(const int timeStepCount) {
+ return std::min(std::max(timeStepCount, 0), MAX_ELAPSED_TIME_STEP_COUNT);
+}
+
+const int ForgettingCurveUtils::ProbabilityTable::PROBABILITY_TABLE_COUNT = 4;
+const int ForgettingCurveUtils::ProbabilityTable::WEAK_PROBABILITY_TABLE_ID = 0;
+const int ForgettingCurveUtils::ProbabilityTable::MODEST_PROBABILITY_TABLE_ID = 1;
+const int ForgettingCurveUtils::ProbabilityTable::STRONG_PROBABILITY_TABLE_ID = 2;
+const int ForgettingCurveUtils::ProbabilityTable::AGGRESSIVE_PROBABILITY_TABLE_ID = 3;
+const int ForgettingCurveUtils::ProbabilityTable::WEAK_MAX_PROBABILITY = 127;
+const int ForgettingCurveUtils::ProbabilityTable::MODEST_BASE_PROBABILITY = 32;
+const int ForgettingCurveUtils::ProbabilityTable::STRONG_BASE_PROBABILITY = 35;
+const int ForgettingCurveUtils::ProbabilityTable::AGGRESSIVE_BASE_PROBABILITY = 40;
+
+
+ForgettingCurveUtils::ProbabilityTable::ProbabilityTable() : mTables() {
+ mTables.resize(PROBABILITY_TABLE_COUNT);
+ for (int tableId = 0; tableId < PROBABILITY_TABLE_COUNT; ++tableId) {
+ mTables[tableId].resize(MAX_LEVEL + 1);
+ for (int level = 0; level <= MAX_LEVEL; ++level) {
+ mTables[tableId][level].resize(MAX_ELAPSED_TIME_STEP_COUNT + 1);
+ const float initialProbability = getBaseProbabilityForLevel(tableId, level);
+ const float endProbability = getBaseProbabilityForLevel(tableId, level - 1);
+ for (int timeStepCount = 0; timeStepCount <= MAX_ELAPSED_TIME_STEP_COUNT;
+ ++timeStepCount) {
+ if (level == 0) {
+ mTables[tableId][level][timeStepCount] = NOT_A_PROBABILITY;
+ continue;
+ }
+ const float probability = initialProbability
+ * powf(initialProbability / endProbability,
+ -1.0f * static_cast<float>(timeStepCount)
+ / static_cast<float>(MAX_ELAPSED_TIME_STEP_COUNT + 1));
+ mTables[tableId][level][timeStepCount] =
+ std::min(std::max(static_cast<int>(probability), 1), MAX_PROBABILITY);
+ }
+ }
+ }
+}
+
+/* static */ int ForgettingCurveUtils::ProbabilityTable::getBaseProbabilityForLevel(
+ const int tableId, const int level) {
+ if (tableId == WEAK_PROBABILITY_TABLE_ID) {
+ // Max probability is 127.
+ return static_cast<float>(WEAK_MAX_PROBABILITY / (1 << (MAX_LEVEL - level)));
+ } else if (tableId == MODEST_PROBABILITY_TABLE_ID) {
+ // Max probability is 128.
+ return static_cast<float>(MODEST_BASE_PROBABILITY * (level + 1));
+ } else if (tableId == STRONG_PROBABILITY_TABLE_ID) {
+ // Max probability is 140.
+ return static_cast<float>(STRONG_BASE_PROBABILITY * (level + 1));
+ } else if (tableId == AGGRESSIVE_PROBABILITY_TABLE_ID) {
+ // Max probability is 160.
+ return static_cast<float>(AGGRESSIVE_BASE_PROBABILITY * (level + 1));
+ } else {
+ return NOT_A_PROBABILITY;
+ }
+}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
new file mode 100644
index 0000000..703cca2
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_FORGETTING_CURVE_UTILS_H
+#define LATINIME_FORGETTING_CURVE_UTILS_H
+
+#include <vector>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/historical_info.h"
+
+namespace latinime {
+
+class HeaderPolicy;
+
+class ForgettingCurveUtils {
+ public:
+ static const HistoricalInfo createUpdatedHistoricalInfo(
+ const HistoricalInfo *const originalHistoricalInfo, const int newProbability,
+ const HistoricalInfo *const newHistoricalInfo, const HeaderPolicy *const headerPolicy);
+
+ static const HistoricalInfo createHistoricalInfoToSave(
+ const HistoricalInfo *const originalHistoricalInfo,
+ const HeaderPolicy *const headerPolicy);
+
+ static int decodeProbability(const HistoricalInfo *const historicalInfo,
+ const HeaderPolicy *const headerPolicy);
+
+ static int getProbability(const int encodedUnigramProbability,
+ const int encodedBigramProbability);
+
+ static bool needsToKeep(const HistoricalInfo *const historicalInfo,
+ const HeaderPolicy *const headerPolicy);
+
+ static bool needsToDecay(const bool mindsBlockByDecay, const int unigramCount,
+ const int bigramCount, const HeaderPolicy *const headerPolicy);
+
+ AK_FORCE_INLINE static int getUnigramCountHardLimit(const int maxUnigramCount) {
+ return static_cast<int>(static_cast<float>(maxUnigramCount)
+ * UNIGRAM_COUNT_HARD_LIMIT_WEIGHT);
+ }
+
+ AK_FORCE_INLINE static int getBigramCountHardLimit(const int maxBigramCount) {
+ return static_cast<int>(static_cast<float>(maxBigramCount)
+ * BIGRAM_COUNT_HARD_LIMIT_WEIGHT);
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(ForgettingCurveUtils);
+
+ class ProbabilityTable {
+ public:
+ ProbabilityTable();
+
+ int getProbability(const int tableId, const int level,
+ const int elapsedTimeStepCount) const {
+ return mTables[tableId][level][elapsedTimeStepCount];
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ProbabilityTable);
+
+ static const int PROBABILITY_TABLE_COUNT;
+ static const int WEAK_PROBABILITY_TABLE_ID;
+ static const int MODEST_PROBABILITY_TABLE_ID;
+ static const int STRONG_PROBABILITY_TABLE_ID;
+ static const int AGGRESSIVE_PROBABILITY_TABLE_ID;
+
+ static const int WEAK_MAX_PROBABILITY;
+ static const int MODEST_BASE_PROBABILITY;
+ static const int STRONG_BASE_PROBABILITY;
+ static const int AGGRESSIVE_BASE_PROBABILITY;
+
+ std::vector<std::vector<std::vector<int>>> mTables;
+
+ static int getBaseProbabilityForLevel(const int tableId, const int level);
+ };
+
+ static const int MULTIPLIER_TWO_IN_PROBABILITY_SCALE;
+ static const int DECAY_INTERVAL_SECONDS;
+
+ static const int MAX_LEVEL;
+ static const int MIN_VISIBLE_LEVEL;
+ static const int MAX_ELAPSED_TIME_STEP_COUNT;
+ static const int DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD;
+
+ static const float UNIGRAM_COUNT_HARD_LIMIT_WEIGHT;
+ static const float BIGRAM_COUNT_HARD_LIMIT_WEIGHT;
+
+ static const ProbabilityTable sProbabilityTable;
+
+ static int backoff(const int unigramProbability);
+ static int getElapsedTimeStepCount(const int timestamp, const int durationToLevelDown);
+ static int clampToVisibleEntryLevelRange(const int level);
+ static int clampToValidLevelRange(const int level);
+ static int clampToValidCountRange(const int count, const HeaderPolicy *const headerPolicy);
+ static int clampToValidTimeStepCountRange(const int timeStepCount);
+};
+} // namespace latinime
+#endif /* LATINIME_FORGETTING_CURVE_UTILS_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/utils/format_utils.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/format_utils.cpp
new file mode 100644
index 0000000..a206e80
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/format_utils.cpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/format_utils.h"
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/byte_array_utils.h"
+
+namespace latinime {
+
+const uint32_t FormatUtils::MAGIC_NUMBER = 0x9BC13AFE;
+
+// Magic number (4 bytes), version (2 bytes), flags (2 bytes), header size (4 bytes) = 12
+const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 12;
+
+/* static */ FormatUtils::FORMAT_VERSION FormatUtils::getFormatVersion(const int formatVersion) {
+ switch (formatVersion) {
+ case VERSION_2:
+ return VERSION_2;
+ case VERSION_4_ONLY_FOR_TESTING:
+ return VERSION_4_ONLY_FOR_TESTING;
+ case VERSION_4:
+ return VERSION_4;
+ case VERSION_4_DEV:
+ return VERSION_4_DEV;
+ default:
+ return UNKNOWN_VERSION;
+ }
+}
+/* static */ FormatUtils::FORMAT_VERSION FormatUtils::detectFormatVersion(
+ const uint8_t *const dict, const int dictSize) {
+ // The magic number is stored big-endian.
+ // If the dictionary is less than 4 bytes, we can't even read the magic number, so we don't
+ // understand this format.
+ if (dictSize < DICTIONARY_MINIMUM_SIZE) {
+ return UNKNOWN_VERSION;
+ }
+ const uint32_t magicNumber = ByteArrayUtils::readUint32(dict, 0);
+ switch (magicNumber) {
+ case MAGIC_NUMBER:
+ // The layout of the header is as follows:
+ // Magic number (4 bytes) 0x9B 0xC1 0x3A 0xFE
+ // Dictionary format version number (2 bytes)
+ // Options (2 bytes)
+ // Header size (4 bytes) : integer, big endian
+ // Conceptually this converts the hardcoded value of the bytes in the file into
+ // the symbolic value we use in the code. But we want the constants to be the
+ // same so we use them for both here.
+ return getFormatVersion(ByteArrayUtils::readUint16(dict, 4));
+ default:
+ return UNKNOWN_VERSION;
+ }
+}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/utils/format_utils.h b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/format_utils.h
new file mode 100644
index 0000000..a4037e8
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/format_utils.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_FORMAT_UTILS_H
+#define LATINIME_FORMAT_UTILS_H
+
+#include <cstdint>
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+/**
+ * Methods to handle binary dictionary format version.
+ */
+class FormatUtils {
+ public:
+ enum FORMAT_VERSION {
+ // These MUST have the same values as the relevant constants in FormatSpec.java.
+ VERSION_2 = 2,
+ VERSION_4_ONLY_FOR_TESTING = 399,
+ VERSION_4 = 402,
+ VERSION_4_DEV = 403,
+ UNKNOWN_VERSION = -1
+ };
+
+ // 32 bit magic number is stored at the beginning of the dictionary header to reject
+ // unsupported or obsolete dictionary formats.
+ static const uint32_t MAGIC_NUMBER;
+
+ static FORMAT_VERSION getFormatVersion(const int formatVersion);
+ static FORMAT_VERSION detectFormatVersion(const uint8_t *const dict, const int dictSize);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(FormatUtils);
+
+ static const int DICTIONARY_MINIMUM_SIZE;
+};
+} // namespace latinime
+#endif /* LATINIME_FORMAT_UTILS_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/utils/historical_info.h b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/historical_info.h
new file mode 100644
index 0000000..80d5e26
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/historical_info.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_HISTORICAL_INFO_H
+#define LATINIME_HISTORICAL_INFO_H
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+class HistoricalInfo {
+ public:
+ // Invalid historical info.
+ HistoricalInfo()
+ : mTimestamp(NOT_A_TIMESTAMP), mLevel(0), mCount(0) {}
+
+ HistoricalInfo(const int timestamp, const int level, const int count)
+ : mTimestamp(timestamp), mLevel(level), mCount(count) {}
+
+ bool isValid() const {
+ return mTimestamp != NOT_A_TIMESTAMP;
+ }
+
+ int getTimeStamp() const {
+ return mTimestamp;
+ }
+
+ int getLevel() const {
+ return mLevel;
+ }
+
+ int getCount() const {
+ return mCount;
+ }
+
+ private:
+ // Copy constructor is public to use this class as a type of return value.
+ DISALLOW_ASSIGNMENT_OPERATOR(HistoricalInfo);
+
+ const int mTimestamp;
+ const int mLevel;
+ const int mCount;
+};
+} // namespace latinime
+#endif /* LATINIME_HISTORICAL_INFO_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/utils/mmapped_buffer.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/mmapped_buffer.cpp
new file mode 100644
index 0000000..341a849
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/mmapped_buffer.cpp
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+
+#include <cerrno>
+#include <climits>
+#include <cstdio>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/file_utils.h"
+
+namespace latinime {
+
+/* static */ MmappedBuffer::MmappedBufferPtr MmappedBuffer::openBuffer(
+ const char *const path, const int bufferOffset, const int bufferSize,
+ const bool isUpdatable) {
+ const int mmapFd = open(path, O_RDONLY);
+ if (mmapFd < 0) {
+ AKLOGE("DICT: Can't open the source. path=%s errno=%d", path, errno);
+ return nullptr;
+ }
+ const int pagesize = sysconf(_SC_PAGESIZE);
+ const int offset = bufferOffset % pagesize;
+ int alignedOffset = bufferOffset - offset;
+ int alignedSize = bufferSize + offset;
+ const int protMode = isUpdatable ? PROT_READ | PROT_WRITE : PROT_READ;
+ void *const mmappedBuffer = mmap(0, alignedSize, protMode, MAP_PRIVATE, mmapFd,
+ alignedOffset);
+ if (mmappedBuffer == MAP_FAILED) {
+ AKLOGE("DICT: Can't mmap dictionary. errno=%d", errno);
+ close(mmapFd);
+ return nullptr;
+ }
+ uint8_t *const buffer = static_cast<uint8_t *>(mmappedBuffer) + offset;
+ if (!buffer) {
+ AKLOGE("DICT: buffer is null");
+ close(mmapFd);
+ return nullptr;
+ }
+ return MmappedBufferPtr(new MmappedBuffer(buffer, bufferSize, mmappedBuffer, alignedSize,
+ mmapFd, isUpdatable));
+}
+
+/* static */ MmappedBuffer::MmappedBufferPtr MmappedBuffer::openBuffer(
+ const char *const path, const bool isUpdatable) {
+ const int fileSize = FileUtils::getFileSize(path);
+ if (fileSize == -1) {
+ return nullptr;
+ } else if (fileSize == 0) {
+ return MmappedBufferPtr(new MmappedBuffer(isUpdatable));
+ } else {
+ return openBuffer(path, 0 /* bufferOffset */, fileSize, isUpdatable);
+ }
+}
+
+/* static */ MmappedBuffer::MmappedBufferPtr MmappedBuffer::openBuffer(
+ const char *const dirPath, const char *const fileName, const bool isUpdatable) {
+ const int filePathBufferSize = PATH_MAX + 1 /* terminator */;
+ char filePath[filePathBufferSize];
+ const int filePathLength = snprintf(filePath, filePathBufferSize, "%s%s", dirPath,
+ fileName);
+ if (filePathLength >= filePathBufferSize) {
+ return nullptr;
+ }
+ return openBuffer(filePath, isUpdatable);
+}
+
+MmappedBuffer::~MmappedBuffer() {
+ if (mAlignedSize == 0) {
+ return;
+ }
+ int ret = munmap(mMmappedBuffer, mAlignedSize);
+ if (ret != 0) {
+ AKLOGE("DICT: Failure in munmap. ret=%d errno=%d", ret, errno);
+ }
+ ret = close(mMmapFd);
+ if (ret != 0) {
+ AKLOGE("DICT: Failure in close. ret=%d errno=%d", ret, errno);
+ }
+}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/utils/mmapped_buffer.h b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/mmapped_buffer.h
new file mode 100644
index 0000000..247ea5f
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/mmapped_buffer.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_MMAPPED_BUFFER_H
+#define LATINIME_MMAPPED_BUFFER_H
+
+#include <cstdint>
+#include <memory>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/utils/byte_array_view.h"
+
+namespace latinime {
+
+class MmappedBuffer {
+ public:
+ typedef std::unique_ptr<const MmappedBuffer> MmappedBufferPtr;
+
+ static MmappedBufferPtr openBuffer(const char *const path,
+ const int bufferOffset, const int bufferSize, const bool isUpdatable);
+
+ // Mmap entire file.
+ static MmappedBufferPtr openBuffer(const char *const path, const bool isUpdatable);
+
+ static MmappedBufferPtr openBuffer(const char *const dirPath, const char *const fileName,
+ const bool isUpdatable);
+
+ ~MmappedBuffer();
+
+ ReadWriteByteArrayView getReadWriteByteArrayView() const {
+ return mByteArrayView;
+ }
+
+ ReadOnlyByteArrayView getReadOnlyByteArrayView() const {
+ return mByteArrayView.getReadOnlyView();
+ }
+
+ AK_FORCE_INLINE bool isUpdatable() const {
+ return mIsUpdatable;
+ }
+
+ private:
+ AK_FORCE_INLINE MmappedBuffer(uint8_t *const buffer, const int bufferSize,
+ void *const mmappedBuffer, const int alignedSize, const int mmapFd,
+ const bool isUpdatable)
+ : mByteArrayView(buffer, bufferSize), mMmappedBuffer(mmappedBuffer),
+ mAlignedSize(alignedSize), mMmapFd(mmapFd), mIsUpdatable(isUpdatable) {}
+
+ // Empty file. We have to handle an empty file as a valid part of a dictionary.
+ AK_FORCE_INLINE MmappedBuffer(const bool isUpdatable)
+ : mByteArrayView(), mMmappedBuffer(nullptr), mAlignedSize(0),
+ mMmapFd(0), mIsUpdatable(isUpdatable) {}
+
+ DISALLOW_IMPLICIT_CONSTRUCTORS(MmappedBuffer);
+
+ const ReadWriteByteArrayView mByteArrayView;
+ void *const mMmappedBuffer;
+ const int mAlignedSize;
+ const int mMmapFd;
+ const bool mIsUpdatable;
+};
+}
+#endif /* LATINIME_MMAPPED_BUFFER_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/utils/probability_utils.h b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/probability_utils.h
new file mode 100644
index 0000000..f5facbf
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/probability_utils.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PROBABILITY_UTILS_H
+#define LATINIME_PROBABILITY_UTILS_H
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+// TODO: Quit using bigram probability to indicate the delta.
+class ProbabilityUtils {
+ public:
+ static AK_FORCE_INLINE int backoff(const int unigramProbability) {
+ return unigramProbability;
+ // For some reason, applying the backoff weight gives bad results in tests. To apply the
+ // backoff weight, we divide the probability by 2, which in our storing format means
+ // decreasing the score by 8.
+ // TODO: figure out what's wrong with this.
+ // return unigramProbability > 8 ?
+ // unigramProbability - 8 : (0 == unigramProbability ? 0 : 8);
+ }
+
+ static AK_FORCE_INLINE int computeProbabilityForBigram(
+ const int unigramProbability, const int bigramProbability) {
+ // We divide the range [unigramProbability..255] in 16.5 steps - in other words, we want
+ // the unigram probability to be the median value of the 17th step from the top. A value of
+ // 0 for the bigram probability represents the middle of the 16th step from the top,
+ // while a value of 15 represents the middle of the top step.
+ // See makedict.BinaryDictEncoder#makeBigramFlags for details.
+ const float stepSize = static_cast<float>(MAX_PROBABILITY - unigramProbability)
+ / (1.5f + MAX_BIGRAM_ENCODED_PROBABILITY);
+ return unigramProbability
+ + static_cast<int>(static_cast<float>(bigramProbability + 1) * stepSize);
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(ProbabilityUtils);
+};
+}
+#endif /* LATINIME_PROBABILITY_UTILS_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/utils/sparse_table.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/sparse_table.cpp
new file mode 100644
index 0000000..c97e3ff
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/sparse_table.cpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/sparse_table.h"
+
+namespace latinime {
+
+const int SparseTable::NOT_EXIST = -1;
+const int SparseTable::INDEX_SIZE = 4;
+
+bool SparseTable::contains(const int id) const {
+ const int readingPos = getPosInIndexTable(id);
+ if (id < 0 || mIndexTableBuffer->getTailPosition() <= readingPos) {
+ return false;
+ }
+ const int index = mIndexTableBuffer->readUint(INDEX_SIZE, readingPos);
+ return index != NOT_EXIST;
+}
+
+uint32_t SparseTable::get(const int id) const {
+ const int indexTableReadingPos = getPosInIndexTable(id);
+ const int index = mIndexTableBuffer->readUint(INDEX_SIZE, indexTableReadingPos);
+ const int contentTableReadingPos = getPosInContentTable(id, index);
+ if (contentTableReadingPos < 0
+ || contentTableReadingPos >= mContentTableBuffer->getTailPosition()) {
+ AKLOGE("contentTableReadingPos(%d) is invalid. id: %d, index: %d",
+ contentTableReadingPos, id, index);
+ return NOT_A_DICT_POS;
+ }
+ const int contentValue = mContentTableBuffer->readUint(mDataSize, contentTableReadingPos);
+ return contentValue == NOT_EXIST ? NOT_A_DICT_POS : contentValue;
+}
+
+bool SparseTable::set(const int id, const uint32_t value) {
+ const int posInIndexTable = getPosInIndexTable(id);
+ // Extends the index table if needed.
+ int tailPos = mIndexTableBuffer->getTailPosition();
+ while (tailPos <= posInIndexTable) {
+ if (!mIndexTableBuffer->writeUintAndAdvancePosition(NOT_EXIST, INDEX_SIZE, &tailPos)) {
+ AKLOGE("cannot extend index table. tailPos: %d to: %d", tailPos, posInIndexTable);
+ return false;
+ }
+ }
+ if (contains(id)) {
+ // The entry is already in the content table.
+ const int index = mIndexTableBuffer->readUint(INDEX_SIZE, posInIndexTable);
+ if (!mContentTableBuffer->writeUint(value, mDataSize, getPosInContentTable(id, index))) {
+ AKLOGE("cannot update value %d. pos: %d, tailPos: %d, mDataSize: %d", value,
+ getPosInContentTable(id, index), mContentTableBuffer->getTailPosition(),
+ mDataSize);
+ return false;
+ }
+ return true;
+ }
+ // The entry is not in the content table.
+ // Create new entry in the content table.
+ const int index = getIndexFromContentTablePos(mContentTableBuffer->getTailPosition());
+ if (!mIndexTableBuffer->writeUint(index, INDEX_SIZE, posInIndexTable)) {
+ AKLOGE("cannot write index %d. pos %d", index, posInIndexTable);
+ return false;
+ }
+ // Write a new block that containing the entry to be set.
+ int writingPos = getPosInContentTable(0 /* id */, index);
+ for (int i = 0; i < mBlockSize; ++i) {
+ if (!mContentTableBuffer->writeUintAndAdvancePosition(NOT_EXIST, mDataSize,
+ &writingPos)) {
+ AKLOGE("cannot write content table to extend. writingPos: %d, tailPos: %d, "
+ "mDataSize: %d", writingPos, mContentTableBuffer->getTailPosition(), mDataSize);
+ return false;
+ }
+ }
+ return mContentTableBuffer->writeUint(value, mDataSize, getPosInContentTable(id, index));
+}
+
+int SparseTable::getIndexFromContentTablePos(const int contentTablePos) const {
+ return contentTablePos / mDataSize / mBlockSize;
+}
+
+int SparseTable::getPosInIndexTable(const int id) const {
+ return (id / mBlockSize) * INDEX_SIZE;
+}
+
+int SparseTable::getPosInContentTable(const int id, const int index) const {
+ const int offset = id % mBlockSize;
+ return (index * mBlockSize + offset) * mDataSize;
+}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/utils/sparse_table.h b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/sparse_table.h
new file mode 100644
index 0000000..c38e106
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/sparse_table.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_SPARSE_TABLE_H
+#define LATINIME_SPARSE_TABLE_H
+
+#include <cstdint>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+// Note that there is a corresponding implementation in SparseTable.java.
+// TODO: Support multiple content buffers.
+class SparseTable {
+ public:
+ SparseTable(BufferWithExtendableBuffer *const indexTableBuffer,
+ BufferWithExtendableBuffer *const contentTableBuffer, const int blockSize,
+ const int dataSize)
+ : mIndexTableBuffer(indexTableBuffer), mContentTableBuffer(contentTableBuffer),
+ mBlockSize(blockSize), mDataSize(dataSize) {}
+
+ bool contains(const int id) const;
+
+ uint32_t get(const int id) const;
+
+ bool set(const int id, const uint32_t value);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTable);
+
+ int getIndexFromContentTablePos(const int contentTablePos) const;
+
+ int getPosInIndexTable(const int id) const;
+
+ int getPosInContentTable(const int id, const int index) const;
+
+ static const int NOT_EXIST;
+ static const int INDEX_SIZE;
+
+ BufferWithExtendableBuffer *const mIndexTableBuffer;
+ BufferWithExtendableBuffer *const mContentTableBuffer;
+ const int mBlockSize;
+ const int mDataSize;
+};
+} // namespace latinime
+#endif /* LATINIME_SPARSE_TABLE_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/utils/trie_map.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/trie_map.cpp
new file mode 100644
index 0000000..10c8711
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/trie_map.cpp
@@ -0,0 +1,387 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/trie_map.h"
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
+
+namespace latinime {
+
+const int TrieMap::INVALID_INDEX = -1;
+const int TrieMap::FIELD0_SIZE = 4;
+const int TrieMap::FIELD1_SIZE = 3;
+const int TrieMap::ENTRY_SIZE = FIELD0_SIZE + FIELD1_SIZE;
+const uint32_t TrieMap::VALUE_FLAG = 0x400000;
+const uint32_t TrieMap::VALUE_MASK = 0x3FFFFF;
+const uint32_t TrieMap::TERMINAL_LINK_FLAG = 0x800000;
+const uint32_t TrieMap::TERMINAL_LINK_MASK = 0x7FFFFF;
+const int TrieMap::NUM_OF_BITS_USED_FOR_ONE_LEVEL = 5;
+const uint32_t TrieMap::LABEL_MASK = 0x1F;
+const int TrieMap::MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL = 1 << NUM_OF_BITS_USED_FOR_ONE_LEVEL;
+const int TrieMap::ROOT_BITMAP_ENTRY_INDEX = 0;
+const int TrieMap::ROOT_BITMAP_ENTRY_POS = MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL * FIELD0_SIZE;
+const TrieMap::Entry TrieMap::EMPTY_BITMAP_ENTRY = TrieMap::Entry(0, 0);
+const uint64_t TrieMap::MAX_VALUE =
+ (static_cast<uint64_t>(1) << ((FIELD0_SIZE + FIELD1_SIZE) * CHAR_BIT)) - 1;
+const int TrieMap::MAX_BUFFER_SIZE = TERMINAL_LINK_MASK * ENTRY_SIZE;
+
+TrieMap::TrieMap() : mBuffer(MAX_BUFFER_SIZE) {
+ mBuffer.extend(ROOT_BITMAP_ENTRY_POS);
+ writeEntry(EMPTY_BITMAP_ENTRY, ROOT_BITMAP_ENTRY_INDEX);
+}
+
+TrieMap::TrieMap(const ReadWriteByteArrayView buffer)
+ : mBuffer(buffer, BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE) {}
+
+void TrieMap::dump(const int from, const int to) const {
+ AKLOGI("BufSize: %d", mBuffer.getTailPosition());
+ for (int i = from; i < to; ++i) {
+ AKLOGI("Entry[%d]: %x, %x", i, readField0(i), readField1(i));
+ }
+ int unusedRegionSize = 0;
+ for (int i = 1; i <= MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL; ++i) {
+ int index = readEmptyTableLink(i);
+ while (index != ROOT_BITMAP_ENTRY_INDEX) {
+ index = readField0(index);
+ unusedRegionSize += i;
+ }
+ }
+ AKLOGI("Unused Size: %d", unusedRegionSize);
+}
+
+int TrieMap::getNextLevelBitmapEntryIndex(const int key, const int bitmapEntryIndex) {
+ const Entry bitmapEntry = readEntry(bitmapEntryIndex);
+ const uint32_t unsignedKey = static_cast<uint32_t>(key);
+ const int terminalEntryIndex = getTerminalEntryIndex(
+ unsignedKey, getBitShuffledKey(unsignedKey), bitmapEntry, 0 /* level */);
+ if (terminalEntryIndex == INVALID_INDEX) {
+ // Not found.
+ return INVALID_INDEX;
+ }
+ const Entry terminalEntry = readEntry(terminalEntryIndex);
+ if (terminalEntry.hasTerminalLink()) {
+ return terminalEntry.getValueEntryIndex() + 1;
+ }
+ // Create a value entry and a bitmap entry.
+ const int valueEntryIndex = allocateTable(2 /* entryCount */);
+ if (!writeEntry(Entry(0, terminalEntry.getValue()), valueEntryIndex)) {
+ return INVALID_INDEX;
+ }
+ if (!writeEntry(EMPTY_BITMAP_ENTRY, valueEntryIndex + 1)) {
+ return INVALID_INDEX;
+ }
+ if (!writeField1(valueEntryIndex | TERMINAL_LINK_FLAG, valueEntryIndex)) {
+ return INVALID_INDEX;
+ }
+ return valueEntryIndex + 1;
+}
+
+const TrieMap::Result TrieMap::get(const int key, const int bitmapEntryIndex) const {
+ const uint32_t unsignedKey = static_cast<uint32_t>(key);
+ return getInternal(unsignedKey, getBitShuffledKey(unsignedKey), bitmapEntryIndex,
+ 0 /* level */);
+}
+
+bool TrieMap::put(const int key, const uint64_t value, const int bitmapEntryIndex) {
+ if (value > MAX_VALUE) {
+ return false;
+ }
+ const uint32_t unsignedKey = static_cast<uint32_t>(key);
+ return putInternal(unsignedKey, value, getBitShuffledKey(unsignedKey), bitmapEntryIndex,
+ readEntry(bitmapEntryIndex), 0 /* level */);
+}
+
+bool TrieMap::save(FILE *const file) const {
+ return DictFileWritingUtils::writeBufferToFileTail(file, &mBuffer);
+}
+
+/**
+ * Iterate next entry in a certain level.
+ *
+ * @param iterationState the iteration state that will be read and updated in this method.
+ * @param outKey the output key
+ * @return Result instance. mIsValid is false when all entries are iterated.
+ */
+const TrieMap::Result TrieMap::iterateNext(std::vector<TableIterationState> *const iterationState,
+ int *const outKey) const {
+ while (!iterationState->empty()) {
+ TableIterationState &state = iterationState->back();
+ if (state.mTableSize <= state.mCurrentIndex) {
+ // Move to parent.
+ iterationState->pop_back();
+ } else {
+ const int entryIndex = state.mTableIndex + state.mCurrentIndex;
+ state.mCurrentIndex += 1;
+ const Entry entry = readEntry(entryIndex);
+ if (entry.isBitmapEntry()) {
+ // Move to child.
+ iterationState->emplace_back(popCount(entry.getBitmap()), entry.getTableIndex());
+ } else {
+ if (outKey) {
+ *outKey = entry.getKey();
+ }
+ if (!entry.hasTerminalLink()) {
+ return Result(entry.getValue(), true, INVALID_INDEX);
+ }
+ const int valueEntryIndex = entry.getValueEntryIndex();
+ const Entry valueEntry = readEntry(valueEntryIndex);
+ return Result(valueEntry.getValueOfValueEntry(), true, valueEntryIndex + 1);
+ }
+ }
+ }
+ // Visited all entries.
+ return Result(0, false, INVALID_INDEX);
+}
+
+/**
+ * Shuffle bits of the key in the fixed order.
+ *
+ * This method is used as a hash function. This returns different values for different inputs.
+ */
+uint32_t TrieMap::getBitShuffledKey(const uint32_t key) const {
+ uint32_t shuffledKey = 0;
+ for (int i = 0; i < 4; ++i) {
+ const uint32_t keyPiece = (key >> (i * 8)) & 0xFF;
+ shuffledKey ^= ((keyPiece ^ (keyPiece << 7) ^ (keyPiece << 14) ^ (keyPiece << 21))
+ & 0x11111111) << i;
+ }
+ return shuffledKey;
+}
+
+bool TrieMap::writeValue(const uint64_t value, const int terminalEntryIndex) {
+ if (value <= VALUE_MASK) {
+ // Write value into the terminal entry.
+ return writeField1(value | VALUE_FLAG, terminalEntryIndex);
+ }
+ // Create value entry and write value.
+ const int valueEntryIndex = allocateTable(2 /* entryCount */);
+ if (!writeEntry(Entry(value >> (FIELD1_SIZE * CHAR_BIT), value), valueEntryIndex)) {
+ return false;
+ }
+ if (!writeEntry(EMPTY_BITMAP_ENTRY, valueEntryIndex + 1)) {
+ return false;
+ }
+ return writeField1(valueEntryIndex | TERMINAL_LINK_FLAG, terminalEntryIndex);
+}
+
+bool TrieMap::updateValue(const Entry &terminalEntry, const uint64_t value,
+ const int terminalEntryIndex) {
+ if (!terminalEntry.hasTerminalLink()) {
+ return writeValue(value, terminalEntryIndex);
+ }
+ const int valueEntryIndex = terminalEntry.getValueEntryIndex();
+ return writeEntry(Entry(value >> (FIELD1_SIZE * CHAR_BIT), value), valueEntryIndex);
+}
+
+bool TrieMap::freeTable(const int tableIndex, const int entryCount) {
+ if (!writeField0(readEmptyTableLink(entryCount), tableIndex)) {
+ return false;
+ }
+ return writeEmptyTableLink(tableIndex, entryCount);
+}
+
+/**
+ * Allocate table with entryCount-entries. Reuse freed table if possible.
+ */
+int TrieMap::allocateTable(const int entryCount) {
+ if (entryCount > 0 && entryCount <= MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL) {
+ const int tableIndex = readEmptyTableLink(entryCount);
+ if (tableIndex > 0) {
+ if (!writeEmptyTableLink(readField0(tableIndex), entryCount)) {
+ return INVALID_INDEX;
+ }
+ // Reuse the table.
+ return tableIndex;
+ }
+ }
+ // Allocate memory space at tail position of the buffer.
+ const int mapIndex = getTailEntryIndex();
+ if (!mBuffer.extend(entryCount * ENTRY_SIZE)) {
+ return INVALID_INDEX;
+ }
+ return mapIndex;
+}
+
+int TrieMap::getTerminalEntryIndex(const uint32_t key, const uint32_t hashedKey,
+ const Entry &bitmapEntry, const int level) const {
+ const int label = getLabel(hashedKey, level);
+ if (!exists(bitmapEntry.getBitmap(), label)) {
+ return INVALID_INDEX;
+ }
+ const int entryIndex = bitmapEntry.getTableIndex() + popCount(bitmapEntry.getBitmap(), label);
+ const Entry entry = readEntry(entryIndex);
+ if (entry.isBitmapEntry()) {
+ // Move to the next level.
+ return getTerminalEntryIndex(key, hashedKey, entry, level + 1);
+ }
+ if (entry.getKey() == key) {
+ // Terminal entry is found.
+ return entryIndex;
+ }
+ return INVALID_INDEX;
+}
+
+/**
+ * Get Result corresponding to the key.
+ *
+ * @param key the key.
+ * @param hashedKey the hashed key.
+ * @param bitmapEntryIndex the index of bitmap entry
+ * @param level current level
+ * @return Result instance corresponding to the key. mIsValid indicates whether the key is in the
+ * map.
+ */
+const TrieMap::Result TrieMap::getInternal(const uint32_t key, const uint32_t hashedKey,
+ const int bitmapEntryIndex, const int level) const {
+ const int terminalEntryIndex = getTerminalEntryIndex(key, hashedKey,
+ readEntry(bitmapEntryIndex), level);
+ if (terminalEntryIndex == INVALID_INDEX) {
+ // Not found.
+ return Result(0, false, INVALID_INDEX);
+ }
+ const Entry terminalEntry = readEntry(terminalEntryIndex);
+ if (!terminalEntry.hasTerminalLink()) {
+ return Result(terminalEntry.getValue(), true, INVALID_INDEX);
+ }
+ const int valueEntryIndex = terminalEntry.getValueEntryIndex();
+ const Entry valueEntry = readEntry(valueEntryIndex);
+ return Result(valueEntry.getValueOfValueEntry(), true, valueEntryIndex + 1);
+}
+
+/**
+ * Put key to value mapping to the map.
+ *
+ * @param key the key.
+ * @param value the value
+ * @param hashedKey the hashed key.
+ * @param bitmapEntryIndex the index of bitmap entry
+ * @param bitmapEntry the bitmap entry
+ * @param level current level
+ * @return whether the key-value has been correctly inserted to the map or not.
+ */
+bool TrieMap::putInternal(const uint32_t key, const uint64_t value, const uint32_t hashedKey,
+ const int bitmapEntryIndex, const Entry &bitmapEntry, const int level) {
+ const int label = getLabel(hashedKey, level);
+ const uint32_t bitmap = bitmapEntry.getBitmap();
+ const int mapIndex = bitmapEntry.getTableIndex();
+ if (!exists(bitmap, label)) {
+ // Current map doesn't contain the label.
+ return addNewEntryByExpandingTable(key, value, mapIndex, bitmap, bitmapEntryIndex, label);
+ }
+ const int entryIndex = mapIndex + popCount(bitmap, label);
+ const Entry entry = readEntry(entryIndex);
+ if (entry.isBitmapEntry()) {
+ // Bitmap entry is found. Go to the next level.
+ return putInternal(key, value, hashedKey, entryIndex, entry, level + 1);
+ }
+ if (entry.getKey() == key) {
+ // Terminal entry for the key is found. Update the value.
+ return updateValue(entry, value, entryIndex);
+ }
+ // Conflict with the existing key.
+ return addNewEntryByResolvingConflict(key, value, hashedKey, entry, entryIndex, level);
+}
+
+/**
+ * Resolve a conflict in the current level and add new entry.
+ *
+ * @param key the key
+ * @param value the value
+ * @param hashedKey the hashed key
+ * @param conflictedEntry the existing conflicted entry
+ * @param conflictedEntryIndex the index of existing conflicted entry
+ * @param level current level
+ * @return whether the key-value has been correctly inserted to the map or not.
+ */
+bool TrieMap::addNewEntryByResolvingConflict(const uint32_t key, const uint64_t value,
+ const uint32_t hashedKey, const Entry &conflictedEntry, const int conflictedEntryIndex,
+ const int level) {
+ const int conflictedKeyNextLabel =
+ getLabel(getBitShuffledKey(conflictedEntry.getKey()), level + 1);
+ const int nextLabel = getLabel(hashedKey, level + 1);
+ if (conflictedKeyNextLabel == nextLabel) {
+ // Conflicted again in the next level.
+ const int newTableIndex = allocateTable(1 /* entryCount */);
+ if (newTableIndex == INVALID_INDEX) {
+ return false;
+ }
+ if (!writeEntry(conflictedEntry, newTableIndex)) {
+ return false;
+ }
+ const Entry newBitmapEntry(setExist(0 /* bitmap */, nextLabel), newTableIndex);
+ if (!writeEntry(newBitmapEntry, conflictedEntryIndex)) {
+ return false;
+ }
+ return putInternal(key, value, hashedKey, conflictedEntryIndex, newBitmapEntry, level + 1);
+ }
+ // The conflict has been resolved. Create a table that contains 2 entries.
+ const int newTableIndex = allocateTable(2 /* entryCount */);
+ if (newTableIndex == INVALID_INDEX) {
+ return false;
+ }
+ if (nextLabel < conflictedKeyNextLabel) {
+ if (!writeTerminalEntry(key, value, newTableIndex)) {
+ return false;
+ }
+ if (!writeEntry(conflictedEntry, newTableIndex + 1)) {
+ return false;
+ }
+ } else { // nextLabel > conflictedKeyNextLabel
+ if (!writeEntry(conflictedEntry, newTableIndex)) {
+ return false;
+ }
+ if (!writeTerminalEntry(key, value, newTableIndex + 1)) {
+ return false;
+ }
+ }
+ const uint32_t updatedBitmap =
+ setExist(setExist(0 /* bitmap */, nextLabel), conflictedKeyNextLabel);
+ return writeEntry(Entry(updatedBitmap, newTableIndex), conflictedEntryIndex);
+}
+
+/**
+ * Add new entry to the existing table.
+ */
+bool TrieMap::addNewEntryByExpandingTable(const uint32_t key, const uint64_t value,
+ const int tableIndex, const uint32_t bitmap, const int bitmapEntryIndex, const int label) {
+ // Current map doesn't contain the label.
+ const int entryCount = popCount(bitmap);
+ const int newTableIndex = allocateTable(entryCount + 1);
+ if (newTableIndex == INVALID_INDEX) {
+ return false;
+ }
+ const int newEntryIndexInTable = popCount(bitmap, label);
+ // Copy from existing table to the new table.
+ for (int i = 0; i < entryCount; ++i) {
+ if (!copyEntry(tableIndex + i, newTableIndex + i + (i >= newEntryIndexInTable ? 1 : 0))) {
+ return false;
+ }
+ }
+ // Write new terminal entry.
+ if (!writeTerminalEntry(key, value, newTableIndex + newEntryIndexInTable)) {
+ return false;
+ }
+ // Update bitmap.
+ if (!writeEntry(Entry(setExist(bitmap, label), newTableIndex), bitmapEntryIndex)) {
+ return false;
+ }
+ if (entryCount > 0) {
+ return freeTable(tableIndex, entryCount);
+ }
+ return true;
+}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/utils/trie_map.h b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/trie_map.h
new file mode 100644
index 0000000..7487634
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/trie_map.h
@@ -0,0 +1,384 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_TRIE_MAP_H
+#define LATINIME_TRIE_MAP_H
+
+#include <climits>
+#include <cstdint>
+#include <cstdio>
+#include <vector>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "third_party/android_prediction/utils/byte_array_view.h"
+
+namespace latinime {
+
+/**
+ * Trie map derived from Phil Bagwell's Hash Array Mapped Trie.
+ * key is int and value is uint64_t.
+ * This supports multiple level map. Terminal entries can have a bitmap for the next level map.
+ * This doesn't support root map resizing.
+ */
+class TrieMap {
+ public:
+ struct Result {
+ const uint64_t mValue;
+ const bool mIsValid;
+ const int mNextLevelBitmapEntryIndex;
+
+ Result(const uint64_t value, const bool isValid, const int nextLevelBitmapEntryIndex)
+ : mValue(value), mIsValid(isValid),
+ mNextLevelBitmapEntryIndex(nextLevelBitmapEntryIndex) {}
+ };
+
+ /**
+ * Struct to record iteration state in a table.
+ */
+ struct TableIterationState {
+ int mTableSize;
+ int mTableIndex;
+ int mCurrentIndex;
+
+ TableIterationState(const int tableSize, const int tableIndex)
+ : mTableSize(tableSize), mTableIndex(tableIndex), mCurrentIndex(0) {}
+ };
+
+ class TrieMapRange;
+ class TrieMapIterator {
+ public:
+ class IterationResult {
+ public:
+ IterationResult(const TrieMap *const trieMap, const int key, const uint64_t value,
+ const int nextLeveBitmapEntryIndex)
+ : mTrieMap(trieMap), mKey(key), mValue(value),
+ mNextLevelBitmapEntryIndex(nextLeveBitmapEntryIndex) {}
+
+ const TrieMapRange getEntriesInNextLevel() const {
+ return TrieMapRange(mTrieMap, mNextLevelBitmapEntryIndex);
+ }
+
+ bool hasNextLevelMap() const {
+ return mNextLevelBitmapEntryIndex != INVALID_INDEX;
+ }
+
+ AK_FORCE_INLINE int key() const {
+ return mKey;
+ }
+
+ AK_FORCE_INLINE uint64_t value() const {
+ return mValue;
+ }
+
+ private:
+ const TrieMap *const mTrieMap;
+ const int mKey;
+ const uint64_t mValue;
+ const int mNextLevelBitmapEntryIndex;
+ };
+
+ TrieMapIterator(const TrieMap *const trieMap, const int bitmapEntryIndex)
+ : mTrieMap(trieMap), mStateStack(), mBaseBitmapEntryIndex(bitmapEntryIndex),
+ mKey(0), mValue(0), mIsValid(false), mNextLevelBitmapEntryIndex(INVALID_INDEX) {
+ if (!trieMap) {
+ return;
+ }
+ const Entry bitmapEntry = mTrieMap->readEntry(mBaseBitmapEntryIndex);
+ mStateStack.emplace_back(
+ mTrieMap->popCount(bitmapEntry.getBitmap()), bitmapEntry.getTableIndex());
+ this->operator++();
+ }
+
+ const IterationResult operator*() const {
+ return IterationResult(mTrieMap, mKey, mValue, mNextLevelBitmapEntryIndex);
+ }
+
+ bool operator!=(const TrieMapIterator &other) const {
+ // Caveat: This works only for for loops.
+ return mIsValid || other.mIsValid;
+ }
+
+ const TrieMapIterator &operator++() {
+ const Result result = mTrieMap->iterateNext(&mStateStack, &mKey);
+ mValue = result.mValue;
+ mIsValid = result.mIsValid;
+ mNextLevelBitmapEntryIndex = result.mNextLevelBitmapEntryIndex;
+ return *this;
+ }
+
+ private:
+ DISALLOW_DEFAULT_CONSTRUCTOR(TrieMapIterator);
+ DISALLOW_ASSIGNMENT_OPERATOR(TrieMapIterator);
+
+ const TrieMap *const mTrieMap;
+ std::vector<TrieMap::TableIterationState> mStateStack;
+ const int mBaseBitmapEntryIndex;
+ int mKey;
+ uint64_t mValue;
+ bool mIsValid;
+ int mNextLevelBitmapEntryIndex;
+ };
+
+ /**
+ * Class to support iterating entries in TrieMap by range base for loops.
+ */
+ class TrieMapRange {
+ public:
+ TrieMapRange(const TrieMap *const trieMap, const int bitmapEntryIndex)
+ : mTrieMap(trieMap), mBaseBitmapEntryIndex(bitmapEntryIndex) {};
+
+ TrieMapIterator begin() const {
+ return TrieMapIterator(mTrieMap, mBaseBitmapEntryIndex);
+ }
+
+ const TrieMapIterator end() const {
+ return TrieMapIterator(nullptr, INVALID_INDEX);
+ }
+
+ private:
+ DISALLOW_DEFAULT_CONSTRUCTOR(TrieMapRange);
+ DISALLOW_ASSIGNMENT_OPERATOR(TrieMapRange);
+
+ const TrieMap *const mTrieMap;
+ const int mBaseBitmapEntryIndex;
+ };
+
+ static const int INVALID_INDEX;
+ static const uint64_t MAX_VALUE;
+
+ TrieMap();
+ // Construct TrieMap using existing data in the memory region written by save().
+ TrieMap(const ReadWriteByteArrayView buffer);
+ void dump(const int from = 0, const int to = 0) const;
+
+ bool isNearSizeLimit() const {
+ return mBuffer.isNearSizeLimit();
+ }
+
+ int getRootBitmapEntryIndex() const {
+ return ROOT_BITMAP_ENTRY_INDEX;
+ }
+
+ // Returns bitmapEntryIndex. Create the next level map if it doesn't exist.
+ int getNextLevelBitmapEntryIndex(const int key) {
+ return getNextLevelBitmapEntryIndex(key, ROOT_BITMAP_ENTRY_INDEX);
+ }
+
+ int getNextLevelBitmapEntryIndex(const int key, const int bitmapEntryIndex);
+
+ const Result getRoot(const int key) const {
+ return get(key, ROOT_BITMAP_ENTRY_INDEX);
+ }
+
+ const Result get(const int key, const int bitmapEntryIndex) const;
+
+ bool putRoot(const int key, const uint64_t value) {
+ return put(key, value, ROOT_BITMAP_ENTRY_INDEX);
+ }
+
+ bool put(const int key, const uint64_t value, const int bitmapEntryIndex);
+
+ const TrieMapRange getEntriesInRootLevel() const {
+ return getEntriesInSpecifiedLevel(ROOT_BITMAP_ENTRY_INDEX);
+ }
+
+ const TrieMapRange getEntriesInSpecifiedLevel(const int bitmapEntryIndex) const {
+ return TrieMapRange(this, bitmapEntryIndex);
+ }
+
+ bool save(FILE *const file) const;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(TrieMap);
+
+ /**
+ * Struct represents an entry.
+ *
+ * Entry is one of these entry types. All entries are fixed size and have 2 fields FIELD_0 and
+ * FIELD_1.
+ * 1. bitmap entry. bitmap entry contains bitmap and the link to hash table.
+ * FIELD_0(bitmap) FIELD_1(LINK_TO_HASH_TABLE)
+ * 2. terminal entry. terminal entry contains hashed key and value or terminal link. terminal
+ * entry have terminal link when the value is not fit to FIELD_1 or there is a next level map
+ * for the key.
+ * FIELD_0(hashed key) (FIELD_1(VALUE_FLAG VALUE) | FIELD_1(TERMINAL_LINK_FLAG TERMINAL_LINK))
+ * 3. value entry. value entry represents a value. Upper order bytes are stored in FIELD_0 and
+ * lower order bytes are stored in FIELD_1.
+ * FIELD_0(value (upper order bytes)) FIELD_1(value (lower order bytes))
+ */
+ struct Entry {
+ const uint32_t mData0;
+ const uint32_t mData1;
+
+ Entry(const uint32_t data0, const uint32_t data1) : mData0(data0), mData1(data1) {}
+
+ AK_FORCE_INLINE bool isBitmapEntry() const {
+ return (mData1 & VALUE_FLAG) == 0 && (mData1 & TERMINAL_LINK_FLAG) == 0;
+ }
+
+ AK_FORCE_INLINE bool hasTerminalLink() const {
+ return (mData1 & TERMINAL_LINK_FLAG) != 0;
+ }
+
+ // For terminal entry.
+ AK_FORCE_INLINE uint32_t getKey() const {
+ return mData0;
+ }
+
+ // For terminal entry.
+ AK_FORCE_INLINE uint32_t getValue() const {
+ return mData1 & VALUE_MASK;
+ }
+
+ // For terminal entry.
+ AK_FORCE_INLINE uint32_t getValueEntryIndex() const {
+ return mData1 & TERMINAL_LINK_MASK;
+ }
+
+ // For bitmap entry.
+ AK_FORCE_INLINE uint32_t getBitmap() const {
+ return mData0;
+ }
+
+ // For bitmap entry.
+ AK_FORCE_INLINE int getTableIndex() const {
+ return static_cast<int>(mData1);
+ }
+
+ // For value entry.
+ AK_FORCE_INLINE uint64_t getValueOfValueEntry() const {
+ return ((static_cast<uint64_t>(mData0) << (FIELD1_SIZE * CHAR_BIT)) ^ mData1);
+ }
+ };
+
+ BufferWithExtendableBuffer mBuffer;
+
+ static const int FIELD0_SIZE;
+ static const int FIELD1_SIZE;
+ static const int ENTRY_SIZE;
+ static const uint32_t VALUE_FLAG;
+ static const uint32_t VALUE_MASK;
+ static const uint32_t TERMINAL_LINK_FLAG;
+ static const uint32_t TERMINAL_LINK_MASK;
+ static const int NUM_OF_BITS_USED_FOR_ONE_LEVEL;
+ static const uint32_t LABEL_MASK;
+ static const int MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL;
+ static const int ROOT_BITMAP_ENTRY_INDEX;
+ static const int ROOT_BITMAP_ENTRY_POS;
+ static const Entry EMPTY_BITMAP_ENTRY;
+ static const int MAX_BUFFER_SIZE;
+
+ uint32_t getBitShuffledKey(const uint32_t key) const;
+ bool writeValue(const uint64_t value, const int terminalEntryIndex);
+ bool updateValue(const Entry &terminalEntry, const uint64_t value,
+ const int terminalEntryIndex);
+ bool freeTable(const int tableIndex, const int entryCount);
+ int allocateTable(const int entryCount);
+ int getTerminalEntryIndex(const uint32_t key, const uint32_t hashedKey,
+ const Entry &bitmapEntry, const int level) const;
+ const Result getInternal(const uint32_t key, const uint32_t hashedKey,
+ const int bitmapEntryIndex, const int level) const;
+ bool putInternal(const uint32_t key, const uint64_t value, const uint32_t hashedKey,
+ const int bitmapEntryIndex, const Entry &bitmapEntry, const int level);
+ bool addNewEntryByResolvingConflict(const uint32_t key, const uint64_t value,
+ const uint32_t hashedKey, const Entry &conflictedEntry, const int conflictedEntryIndex,
+ const int level);
+ bool addNewEntryByExpandingTable(const uint32_t key, const uint64_t value,
+ const int tableIndex, const uint32_t bitmap, const int bitmapEntryIndex,
+ const int label);
+ const Result iterateNext(std::vector<TableIterationState> *const iterationState,
+ int *const outKey) const;
+
+ AK_FORCE_INLINE const Entry readEntry(const int entryIndex) const {
+ return Entry(readField0(entryIndex), readField1(entryIndex));
+ }
+
+ // Returns whether an entry for the index is existing by testing if the index-th bit in the
+ // bitmap is set or not.
+ AK_FORCE_INLINE bool exists(const uint32_t bitmap, const int index) const {
+ return (bitmap & (1 << index)) != 0;
+ }
+
+ // Set index-th bit in the bitmap.
+ AK_FORCE_INLINE uint32_t setExist(const uint32_t bitmap, const int index) const {
+ return bitmap | (1 << index);
+ }
+
+ // Count set bits before index in the bitmap.
+ AK_FORCE_INLINE int popCount(const uint32_t bitmap, const int index) const {
+ return popCount(bitmap & ((1 << index) - 1));
+ }
+
+ // Count set bits in the bitmap.
+ AK_FORCE_INLINE int popCount(const uint32_t bitmap) const {
+ return __builtin_popcount(bitmap);
+ // int v = bitmap - ((bitmap >> 1) & 0x55555555);
+ // v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
+ // return (((v + (v >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
+ }
+
+ AK_FORCE_INLINE int getLabel(const uint32_t hashedKey, const int level) const {
+ return (hashedKey >> (level * NUM_OF_BITS_USED_FOR_ONE_LEVEL)) & LABEL_MASK;
+ }
+
+ AK_FORCE_INLINE uint32_t readField0(const int entryIndex) const {
+ return mBuffer.readUint(FIELD0_SIZE, ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE);
+ }
+
+ AK_FORCE_INLINE uint32_t readField1(const int entryIndex) const {
+ return mBuffer.readUint(FIELD1_SIZE,
+ ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE + FIELD0_SIZE);
+ }
+
+ AK_FORCE_INLINE int readEmptyTableLink(const int entryCount) const {
+ return mBuffer.readUint(FIELD1_SIZE, (entryCount - 1) * FIELD1_SIZE);
+ }
+
+ AK_FORCE_INLINE bool writeEmptyTableLink(const int tableIndex, const int entryCount) {
+ return mBuffer.writeUint(tableIndex, FIELD1_SIZE, (entryCount - 1) * FIELD1_SIZE);
+ }
+
+ AK_FORCE_INLINE bool writeField0(const uint32_t data, const int entryIndex) {
+ return mBuffer.writeUint(data, FIELD0_SIZE,
+ ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE);
+ }
+
+ AK_FORCE_INLINE bool writeField1(const uint32_t data, const int entryIndex) {
+ return mBuffer.writeUint(data, FIELD1_SIZE,
+ ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE + FIELD0_SIZE);
+ }
+
+ AK_FORCE_INLINE bool writeEntry(const Entry &entry, const int entryIndex) {
+ return writeField0(entry.mData0, entryIndex) && writeField1(entry.mData1, entryIndex);
+ }
+
+ AK_FORCE_INLINE bool writeTerminalEntry(const uint32_t key, const uint64_t value,
+ const int entryIndex) {
+ return writeField0(key, entryIndex) && writeValue(value, entryIndex);
+ }
+
+ AK_FORCE_INLINE bool copyEntry(const int originalEntryIndex, const int newEntryIndex) {
+ return writeEntry(readEntry(originalEntryIndex), newEntryIndex);
+ }
+
+ AK_FORCE_INLINE int getTailEntryIndex() const {
+ return (mBuffer.getTailPosition() - ROOT_BITMAP_ENTRY_POS) / ENTRY_SIZE;
+ }
+};
+
+} // namespace latinime
+#endif /* LATINIME_TRIE_MAP_H */
diff --git a/third_party/android_prediction/suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp b/third_party/android_prediction/suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp
new file mode 100644
index 0000000..6d31739
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gesture_suggest_policy_factory.h"
+
+namespace latinime {
+ const SuggestPolicy *(*GestureSuggestPolicyFactory::sGestureSuggestFactoryMethod)() = 0;
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/gesture/gesture_suggest_policy_factory.h b/third_party/android_prediction/suggest/policyimpl/gesture/gesture_suggest_policy_factory.h
new file mode 100644
index 0000000..af51949
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/gesture/gesture_suggest_policy_factory.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_GESTURE_SUGGEST_POLICY_FACTORY_H
+#define LATINIME_GESTURE_SUGGEST_POLICY_FACTORY_H
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+class SuggestPolicy;
+
+class GestureSuggestPolicyFactory {
+ public:
+ static void setGestureSuggestPolicyFactoryMethod(const SuggestPolicy *(*factoryMethod)()) {
+ sGestureSuggestFactoryMethod = factoryMethod;
+ }
+
+ static const SuggestPolicy *getGestureSuggestPolicy() {
+ if (!sGestureSuggestFactoryMethod) {
+ return 0;
+ }
+ return sGestureSuggestFactoryMethod();
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(GestureSuggestPolicyFactory);
+ static const SuggestPolicy *(*sGestureSuggestFactoryMethod)();
+};
+} // namespace latinime
+#endif // LATINIME_GESTURE_SUGGEST_POLICY_FACTORY_H
diff --git a/third_party/android_prediction/suggest/policyimpl/typing/scoring_params.cpp b/third_party/android_prediction/suggest/policyimpl/typing/scoring_params.cpp
new file mode 100644
index 0000000..92e5a9f
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/typing/scoring_params.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/typing/scoring_params.h"
+
+namespace latinime {
+// TODO: RENAME all
+const float ScoringParams::MAX_SPATIAL_DISTANCE = 1.0f;
+const int ScoringParams::THRESHOLD_NEXT_WORD_PROBABILITY = 40;
+const int ScoringParams::THRESHOLD_NEXT_WORD_PROBABILITY_FOR_CAPPED = 120;
+const float ScoringParams::AUTOCORRECT_OUTPUT_THRESHOLD = 1.0f;
+
+const float ScoringParams::EXACT_MATCH_PROMOTION = 1.1f;
+const float ScoringParams::CASE_ERROR_PENALTY_FOR_EXACT_MATCH = 0.01f;
+const float ScoringParams::ACCENT_ERROR_PENALTY_FOR_EXACT_MATCH = 0.02f;
+const float ScoringParams::DIGRAPH_PENALTY_FOR_EXACT_MATCH = 0.03f;
+
+// TODO: Unlimit max cache dic node size
+const int ScoringParams::MAX_CACHE_DIC_NODE_SIZE = 170;
+const int ScoringParams::MAX_CACHE_DIC_NODE_SIZE_FOR_SINGLE_POINT = 310;
+const int ScoringParams::THRESHOLD_SHORT_WORD_LENGTH = 4;
+
+const float ScoringParams::DISTANCE_WEIGHT_LENGTH = 0.1524f;
+const float ScoringParams::PROXIMITY_COST = 0.0694f;
+const float ScoringParams::FIRST_CHAR_PROXIMITY_COST = 0.072f;
+const float ScoringParams::FIRST_PROXIMITY_COST = 0.07788f;
+const float ScoringParams::INTENTIONAL_OMISSION_COST = 0.1f;
+const float ScoringParams::OMISSION_COST = 0.467f;
+const float ScoringParams::OMISSION_COST_SAME_CHAR = 0.345f;
+const float ScoringParams::OMISSION_COST_FIRST_CHAR = 0.5256f;
+const float ScoringParams::INSERTION_COST = 0.7248f;
+const float ScoringParams::TERMINAL_INSERTION_COST = 0.8128f;
+const float ScoringParams::INSERTION_COST_SAME_CHAR = 0.5508f;
+const float ScoringParams::INSERTION_COST_PROXIMITY_CHAR = 0.674f;
+const float ScoringParams::INSERTION_COST_FIRST_CHAR = 0.639f;
+const float ScoringParams::TRANSPOSITION_COST = 0.5608f;
+const float ScoringParams::SPACE_SUBSTITUTION_COST = 0.334f;
+const float ScoringParams::ADDITIONAL_PROXIMITY_COST = 0.4576f;
+const float ScoringParams::SUBSTITUTION_COST = 0.3806f;
+const float ScoringParams::COST_NEW_WORD = 0.0314f;
+const float ScoringParams::COST_SECOND_OR_LATER_WORD_FIRST_CHAR_UPPERCASE = 0.3224f;
+const float ScoringParams::DISTANCE_WEIGHT_LANGUAGE = 1.1214f;
+const float ScoringParams::COST_FIRST_COMPLETION = 0.4836f;
+const float ScoringParams::COST_COMPLETION = 0.00624f;
+const float ScoringParams::HAS_PROXIMITY_TERMINAL_COST = 0.0683f;
+const float ScoringParams::HAS_EDIT_CORRECTION_TERMINAL_COST = 0.0362f;
+const float ScoringParams::HAS_MULTI_WORD_TERMINAL_COST = 0.4182f;
+const float ScoringParams::TYPING_BASE_OUTPUT_SCORE = 1.0f;
+const float ScoringParams::TYPING_MAX_OUTPUT_SCORE_PER_INPUT = 0.1f;
+const float ScoringParams::NORMALIZED_SPATIAL_DISTANCE_THRESHOLD_FOR_EDIT = 0.095f;
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/typing/scoring_params.h b/third_party/android_prediction/suggest/policyimpl/typing/scoring_params.h
new file mode 100644
index 0000000..c40038b
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/typing/scoring_params.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_SCORING_PARAMS_H
+#define LATINIME_SCORING_PARAMS_H
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+class ScoringParams {
+ public:
+ // Fixed model parameters
+ static const float MAX_SPATIAL_DISTANCE;
+ static const int THRESHOLD_NEXT_WORD_PROBABILITY;
+ static const int THRESHOLD_NEXT_WORD_PROBABILITY_FOR_CAPPED;
+ static const float AUTOCORRECT_OUTPUT_THRESHOLD;
+ static const int MAX_CACHE_DIC_NODE_SIZE;
+ static const int MAX_CACHE_DIC_NODE_SIZE_FOR_SINGLE_POINT;
+ static const int THRESHOLD_SHORT_WORD_LENGTH;
+
+ static const float EXACT_MATCH_PROMOTION;
+ static const float CASE_ERROR_PENALTY_FOR_EXACT_MATCH;
+ static const float ACCENT_ERROR_PENALTY_FOR_EXACT_MATCH;
+ static const float DIGRAPH_PENALTY_FOR_EXACT_MATCH;
+
+ // Numerically optimized parameters (currently for tap typing only).
+ // TODO: add ability to modify these constants programmatically.
+ // TODO: explore optimization of gesture parameters.
+ static const float DISTANCE_WEIGHT_LENGTH;
+ static const float PROXIMITY_COST;
+ static const float FIRST_CHAR_PROXIMITY_COST;
+ static const float FIRST_PROXIMITY_COST;
+ static const float INTENTIONAL_OMISSION_COST;
+ static const float OMISSION_COST;
+ static const float OMISSION_COST_SAME_CHAR;
+ static const float OMISSION_COST_FIRST_CHAR;
+ static const float INSERTION_COST;
+ static const float TERMINAL_INSERTION_COST;
+ static const float INSERTION_COST_SAME_CHAR;
+ static const float INSERTION_COST_PROXIMITY_CHAR;
+ static const float INSERTION_COST_FIRST_CHAR;
+ static const float TRANSPOSITION_COST;
+ static const float SPACE_SUBSTITUTION_COST;
+ static const float ADDITIONAL_PROXIMITY_COST;
+ static const float SUBSTITUTION_COST;
+ static const float COST_NEW_WORD;
+ static const float COST_SECOND_OR_LATER_WORD_FIRST_CHAR_UPPERCASE;
+ static const float DISTANCE_WEIGHT_LANGUAGE;
+ static const float COST_FIRST_COMPLETION;
+ static const float COST_COMPLETION;
+ static const float HAS_PROXIMITY_TERMINAL_COST;
+ static const float HAS_EDIT_CORRECTION_TERMINAL_COST;
+ static const float HAS_MULTI_WORD_TERMINAL_COST;
+ static const float TYPING_BASE_OUTPUT_SCORE;
+ static const float TYPING_MAX_OUTPUT_SCORE_PER_INPUT;
+ static const float NORMALIZED_SPATIAL_DISTANCE_THRESHOLD_FOR_EDIT;
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(ScoringParams);
+};
+} // namespace latinime
+#endif // LATINIME_SCORING_PARAMS_H
diff --git a/third_party/android_prediction/suggest/policyimpl/typing/typing_scoring.cpp b/third_party/android_prediction/suggest/policyimpl/typing/typing_scoring.cpp
new file mode 100644
index 0000000..281ae02
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/typing/typing_scoring.cpp
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/typing/typing_scoring.h"
+
+namespace latinime {
+const TypingScoring TypingScoring::sInstance;
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/typing/typing_scoring.h b/third_party/android_prediction/suggest/policyimpl/typing/typing_scoring.h
new file mode 100644
index 0000000..1251011
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/typing/typing_scoring.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_TYPING_SCORING_H
+#define LATINIME_TYPING_SCORING_H
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/dictionary/error_type_utils.h"
+#include "third_party/android_prediction/suggest/core/policy/scoring.h"
+#include "third_party/android_prediction/suggest/core/session/dic_traverse_session.h"
+#include "third_party/android_prediction/suggest/policyimpl/typing/scoring_params.h"
+
+namespace latinime {
+
+class DicNode;
+class DicTraverseSession;
+
+class TypingScoring : public Scoring {
+ public:
+ static const TypingScoring *getInstance() { return &sInstance; }
+
+ AK_FORCE_INLINE void getMostProbableString(const DicTraverseSession *const traverseSession,
+ const float languageWeight, SuggestionResults *const outSuggestionResults) const {}
+
+ AK_FORCE_INLINE float getAdjustedLanguageWeight(DicTraverseSession *const traverseSession,
+ DicNode *const terminals, const int size) const {
+ return 1.0f;
+ }
+
+ AK_FORCE_INLINE int calculateFinalScore(const float compoundDistance, const int inputSize,
+ const ErrorTypeUtils::ErrorType containedErrorTypes, const bool forceCommit,
+ const bool boostExactMatches) const {
+ const float maxDistance = ScoringParams::DISTANCE_WEIGHT_LANGUAGE
+ + static_cast<float>(inputSize) * ScoringParams::TYPING_MAX_OUTPUT_SCORE_PER_INPUT;
+ float score = ScoringParams::TYPING_BASE_OUTPUT_SCORE - compoundDistance / maxDistance;
+ if (forceCommit) {
+ score += ScoringParams::AUTOCORRECT_OUTPUT_THRESHOLD;
+ }
+ if (boostExactMatches && ErrorTypeUtils::isExactMatch(containedErrorTypes)) {
+ score += ScoringParams::EXACT_MATCH_PROMOTION;
+ if ((ErrorTypeUtils::MATCH_WITH_CASE_ERROR & containedErrorTypes) != 0) {
+ score -= ScoringParams::CASE_ERROR_PENALTY_FOR_EXACT_MATCH;
+ }
+ if ((ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR & containedErrorTypes) != 0) {
+ score -= ScoringParams::ACCENT_ERROR_PENALTY_FOR_EXACT_MATCH;
+ }
+ if ((ErrorTypeUtils::MATCH_WITH_DIGRAPH & containedErrorTypes) != 0) {
+ score -= ScoringParams::DIGRAPH_PENALTY_FOR_EXACT_MATCH;
+ }
+ }
+ return static_cast<int>(score * SUGGEST_INTERFACE_OUTPUT_SCALE);
+ }
+
+ AK_FORCE_INLINE float getDoubleLetterDemotionDistanceCost(
+ const DicNode *const terminalDicNode) const {
+ return 0.0f;
+ }
+
+ AK_FORCE_INLINE bool autoCorrectsToMultiWordSuggestionIfTop() const {
+ return true;
+ }
+
+ AK_FORCE_INLINE bool sameAsTyped(const DicTraverseSession *const traverseSession,
+ const DicNode *const dicNode) const {
+ return traverseSession->getProximityInfoState(0)->sameAsTyped(
+ dicNode->getOutputWordBuf(), dicNode->getNodeCodePointCount());
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(TypingScoring);
+ static const TypingScoring sInstance;
+
+ TypingScoring() {}
+ ~TypingScoring() {}
+};
+} // namespace latinime
+#endif // LATINIME_TYPING_SCORING_H
diff --git a/third_party/android_prediction/suggest/policyimpl/typing/typing_suggest_policy.cpp b/third_party/android_prediction/suggest/policyimpl/typing/typing_suggest_policy.cpp
new file mode 100644
index 0000000..94cdb89
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/typing/typing_suggest_policy.cpp
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/typing/typing_suggest_policy.h"
+
+namespace latinime {
+const TypingSuggestPolicy TypingSuggestPolicy::sInstance;
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/typing/typing_suggest_policy.h b/third_party/android_prediction/suggest/policyimpl/typing/typing_suggest_policy.h
new file mode 100644
index 0000000..d9c120f
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/typing/typing_suggest_policy.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_TYPING_SUGGEST_POLICY_H
+#define LATINIME_TYPING_SUGGEST_POLICY_H
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/policy/suggest_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/typing/typing_scoring.h"
+#include "third_party/android_prediction/suggest/policyimpl/typing/typing_traversal.h"
+#include "third_party/android_prediction/suggest/policyimpl/typing/typing_weighting.h"
+
+namespace latinime {
+
+class Scoring;
+class Traversal;
+class Weighting;
+
+class TypingSuggestPolicy : public SuggestPolicy {
+ public:
+ static const TypingSuggestPolicy *getInstance() { return &sInstance; }
+
+ TypingSuggestPolicy() {}
+ virtual ~TypingSuggestPolicy() {}
+ AK_FORCE_INLINE const Traversal *getTraversal() const {
+ return TypingTraversal::getInstance();
+ }
+
+ AK_FORCE_INLINE const Scoring *getScoring() const {
+ return TypingScoring::getInstance();
+ }
+
+ AK_FORCE_INLINE const Weighting *getWeighting() const {
+ return TypingWeighting::getInstance();
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(TypingSuggestPolicy);
+ static const TypingSuggestPolicy sInstance;
+};
+} // namespace latinime
+#endif // LATINIME_TYPING_SUGGEST_POLICY_H
diff --git a/third_party/android_prediction/suggest/policyimpl/typing/typing_suggest_policy_factory.h b/third_party/android_prediction/suggest/policyimpl/typing/typing_suggest_policy_factory.h
new file mode 100644
index 0000000..9c3c752
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/typing/typing_suggest_policy_factory.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_TYPING_SUGGEST_POLICY_FACTORY_H
+#define LATINIME_TYPING_SUGGEST_POLICY_FACTORY_H
+
+#include "third_party/android_prediction/defines.h"
+#include "typing_suggest_policy.h"
+
+namespace latinime {
+
+class SuggestPolicy;
+
+class TypingSuggestPolicyFactory {
+ public:
+ static const SuggestPolicy *getTypingSuggestPolicy() {
+ return TypingSuggestPolicy::getInstance();
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(TypingSuggestPolicyFactory);
+};
+} // namespace latinime
+#endif // LATINIME_TYPING_SUGGEST_POLICY_FACTORY_H
diff --git a/third_party/android_prediction/suggest/policyimpl/typing/typing_traversal.cpp b/third_party/android_prediction/suggest/policyimpl/typing/typing_traversal.cpp
new file mode 100644
index 0000000..600e97b
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/typing/typing_traversal.cpp
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/typing/typing_traversal.h"
+
+namespace latinime {
+const bool TypingTraversal::CORRECT_OMISSION = true;
+const bool TypingTraversal::CORRECT_NEW_WORD_SPACE_SUBSTITUTION = true;
+const bool TypingTraversal::CORRECT_NEW_WORD_SPACE_OMISSION = true;
+const TypingTraversal TypingTraversal::sInstance;
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/typing/typing_traversal.h b/third_party/android_prediction/suggest/policyimpl/typing/typing_traversal.h
new file mode 100644
index 0000000..e0a4886
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/typing/typing_traversal.h
@@ -0,0 +1,186 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_TYPING_TRAVERSAL_H
+#define LATINIME_TYPING_TRAVERSAL_H
+
+#include <cstdint>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/dicnode/dic_node.h"
+#include "third_party/android_prediction/suggest/core/dicnode/dic_node_vector.h"
+#include "third_party/android_prediction/suggest/core/layout/proximity_info_state.h"
+#include "third_party/android_prediction/suggest/core/layout/proximity_info_utils.h"
+#include "third_party/android_prediction/suggest/core/policy/traversal.h"
+#include "third_party/android_prediction/suggest/core/session/dic_traverse_session.h"
+#include "third_party/android_prediction/suggest/policyimpl/typing/scoring_params.h"
+#include "third_party/android_prediction/utils/char_utils.h"
+
+namespace latinime {
+class TypingTraversal : public Traversal {
+ public:
+ static const TypingTraversal *getInstance() { return &sInstance; }
+
+ AK_FORCE_INLINE int getMaxPointerCount() const {
+ return MAX_POINTER_COUNT;
+ }
+
+ AK_FORCE_INLINE bool allowsErrorCorrections(const DicNode *const dicNode) const {
+ return dicNode->getNormalizedSpatialDistance()
+ < ScoringParams::NORMALIZED_SPATIAL_DISTANCE_THRESHOLD_FOR_EDIT;
+ }
+
+ AK_FORCE_INLINE bool isOmission(const DicTraverseSession *const traverseSession,
+ const DicNode *const dicNode, const DicNode *const childDicNode,
+ const bool allowsErrorCorrections) const {
+ if (!CORRECT_OMISSION) {
+ return false;
+ }
+ // Note: Always consider intentional omissions (like apostrophes) since they are common.
+ const bool canConsiderOmission =
+ allowsErrorCorrections || childDicNode->canBeIntentionalOmission();
+ if (!canConsiderOmission) {
+ return false;
+ }
+ const int inputSize = traverseSession->getInputSize();
+ // TODO: Don't refer to isCompletion?
+ if (dicNode->isCompletion(inputSize)) {
+ return false;
+ }
+ if (dicNode->canBeIntentionalOmission()) {
+ return true;
+ }
+ const int point0Index = dicNode->getInputIndex(0);
+ const int currentBaseLowerCodePoint =
+ CharUtils::toBaseLowerCase(childDicNode->getNodeCodePoint());
+ const int typedBaseLowerCodePoint =
+ CharUtils::toBaseLowerCase(traverseSession->getProximityInfoState(0)
+ ->getPrimaryCodePointAt(point0Index));
+ return (currentBaseLowerCodePoint != typedBaseLowerCodePoint);
+ }
+
+ AK_FORCE_INLINE bool isSpaceSubstitutionTerminal(
+ const DicTraverseSession *const traverseSession, const DicNode *const dicNode) const {
+ if (!CORRECT_NEW_WORD_SPACE_SUBSTITUTION) {
+ return false;
+ }
+ if (!canDoLookAheadCorrection(traverseSession, dicNode)) {
+ return false;
+ }
+ const int point0Index = dicNode->getInputIndex(0);
+ return dicNode->isTerminalDicNode()
+ && traverseSession->getProximityInfoState(0)->
+ hasSpaceProximity(point0Index);
+ }
+
+ AK_FORCE_INLINE bool isSpaceOmissionTerminal(
+ const DicTraverseSession *const traverseSession, const DicNode *const dicNode) const {
+ if (!CORRECT_NEW_WORD_SPACE_OMISSION) {
+ return false;
+ }
+ const int inputSize = traverseSession->getInputSize();
+ // TODO: Don't refer to isCompletion?
+ if (dicNode->isCompletion(inputSize)) {
+ return false;
+ }
+ if (!dicNode->isTerminalDicNode()) {
+ return false;
+ }
+ const int16_t pointIndex = dicNode->getInputIndex(0);
+ return pointIndex <= inputSize && !dicNode->isTotalInputSizeExceedingLimit()
+ && !dicNode->shouldBeFilteredBySafetyNetForBigram();
+ }
+
+ AK_FORCE_INLINE bool shouldDepthLevelCache(
+ const DicTraverseSession *const traverseSession) const {
+ const int inputSize = traverseSession->getInputSize();
+ return traverseSession->isCacheBorderForTyping(inputSize);
+ }
+
+ AK_FORCE_INLINE bool shouldNodeLevelCache(
+ const DicTraverseSession *const traverseSession, const DicNode *const dicNode) const {
+ return false;
+ }
+
+ AK_FORCE_INLINE bool canDoLookAheadCorrection(
+ const DicTraverseSession *const traverseSession, const DicNode *const dicNode) const {
+ const int inputSize = traverseSession->getInputSize();
+ return dicNode->canDoLookAheadCorrection(inputSize);
+ }
+
+ AK_FORCE_INLINE ProximityType getProximityType(
+ const DicTraverseSession *const traverseSession, const DicNode *const dicNode,
+ const DicNode *const childDicNode) const {
+ return traverseSession->getProximityInfoState(0)->getProximityType(
+ dicNode->getInputIndex(0), childDicNode->getNodeCodePoint(),
+ true /* checkProximityChars */);
+ }
+
+ AK_FORCE_INLINE bool needsToTraverseAllUserInput() const {
+ return true;
+ }
+
+ AK_FORCE_INLINE float getMaxSpatialDistance() const {
+ return ScoringParams::MAX_SPATIAL_DISTANCE;
+ }
+
+ AK_FORCE_INLINE int getDefaultExpandDicNodeSize() const {
+ return DicNodeVector::DEFAULT_NODES_SIZE_FOR_OPTIMIZATION;
+ }
+
+ AK_FORCE_INLINE int getMaxCacheSize(const int inputSize) const {
+ return (inputSize <= 1) ? ScoringParams::MAX_CACHE_DIC_NODE_SIZE_FOR_SINGLE_POINT
+ : ScoringParams::MAX_CACHE_DIC_NODE_SIZE;
+ }
+
+ AK_FORCE_INLINE int getTerminalCacheSize() const {
+ return MAX_RESULTS;
+ }
+
+ AK_FORCE_INLINE bool isPossibleOmissionChildNode(
+ const DicTraverseSession *const traverseSession, const DicNode *const parentDicNode,
+ const DicNode *const dicNode) const {
+ const ProximityType proximityType =
+ getProximityType(traverseSession, parentDicNode, dicNode);
+ if (!ProximityInfoUtils::isMatchOrProximityChar(proximityType)) {
+ return false;
+ }
+ return true;
+ }
+
+ AK_FORCE_INLINE bool isGoodToTraverseNextWord(const DicNode *const dicNode) const {
+ const int probability = dicNode->getProbability();
+ if (probability < ScoringParams::THRESHOLD_NEXT_WORD_PROBABILITY) {
+ return false;
+ }
+ const bool shortCappedWord = dicNode->getNodeCodePointCount()
+ < ScoringParams::THRESHOLD_SHORT_WORD_LENGTH && dicNode->isFirstCharUppercase();
+ return !shortCappedWord
+ || probability >= ScoringParams::THRESHOLD_NEXT_WORD_PROBABILITY_FOR_CAPPED;
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(TypingTraversal);
+ static const bool CORRECT_OMISSION;
+ static const bool CORRECT_NEW_WORD_SPACE_SUBSTITUTION;
+ static const bool CORRECT_NEW_WORD_SPACE_OMISSION;
+ static const TypingTraversal sInstance;
+
+ TypingTraversal() {}
+ ~TypingTraversal() {}
+};
+} // namespace latinime
+#endif // LATINIME_TYPING_TRAVERSAL_H
diff --git a/third_party/android_prediction/suggest/policyimpl/typing/typing_weighting.cpp b/third_party/android_prediction/suggest/policyimpl/typing/typing_weighting.cpp
new file mode 100644
index 0000000..b865134
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/typing/typing_weighting.cpp
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/typing/typing_weighting.h"
+
+#include "third_party/android_prediction/suggest/core/dicnode/dic_node.h"
+#include "third_party/android_prediction/suggest/policyimpl/typing/scoring_params.h"
+
+namespace latinime {
+
+const TypingWeighting TypingWeighting::sInstance;
+
+ErrorTypeUtils::ErrorType TypingWeighting::getErrorType(const CorrectionType correctionType,
+ const DicTraverseSession *const traverseSession, const DicNode *const parentDicNode,
+ const DicNode *const dicNode) const {
+ switch (correctionType) {
+ case CT_MATCH:
+ if (isProximityDicNode(traverseSession, dicNode)) {
+ return ErrorTypeUtils::PROXIMITY_CORRECTION;
+ } else if (dicNode->isInDigraph()) {
+ return ErrorTypeUtils::MATCH_WITH_DIGRAPH;
+ } else {
+ // Compare the node code point with original primary code point on the keyboard.
+ const ProximityInfoState *const pInfoState =
+ traverseSession->getProximityInfoState(0);
+ const int primaryOriginalCodePoint = pInfoState->getPrimaryOriginalCodePointAt(
+ dicNode->getInputIndex(0));
+ const int nodeCodePoint = dicNode->getNodeCodePoint();
+ if (primaryOriginalCodePoint == nodeCodePoint) {
+ // Node code point is same as original code point on the keyboard.
+ return ErrorTypeUtils::NOT_AN_ERROR;
+ } else if (CharUtils::toLowerCase(primaryOriginalCodePoint) ==
+ CharUtils::toLowerCase(nodeCodePoint)) {
+ // Only cases of the code points are different.
+ return ErrorTypeUtils::MATCH_WITH_CASE_ERROR;
+ } else if (CharUtils::toBaseCodePoint(primaryOriginalCodePoint) ==
+ CharUtils::toBaseCodePoint(nodeCodePoint)) {
+ // Node code point is a variant of original code point.
+ return ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR;
+ } else {
+ // Node code point is a variant of original code point and the cases are also
+ // different.
+ return ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR
+ | ErrorTypeUtils::MATCH_WITH_CASE_ERROR;
+ }
+ }
+ break;
+ case CT_ADDITIONAL_PROXIMITY:
+ return ErrorTypeUtils::PROXIMITY_CORRECTION;
+ case CT_OMISSION:
+ if (parentDicNode->canBeIntentionalOmission()) {
+ return ErrorTypeUtils::INTENTIONAL_OMISSION;
+ } else {
+ return ErrorTypeUtils::EDIT_CORRECTION;
+ }
+ break;
+ case CT_SUBSTITUTION:
+ case CT_INSERTION:
+ case CT_TERMINAL_INSERTION:
+ case CT_TRANSPOSITION:
+ return ErrorTypeUtils::EDIT_CORRECTION;
+ case CT_NEW_WORD_SPACE_OMISSION:
+ case CT_NEW_WORD_SPACE_SUBSTITUTION:
+ return ErrorTypeUtils::NEW_WORD;
+ case CT_TERMINAL:
+ return ErrorTypeUtils::NOT_AN_ERROR;
+ case CT_COMPLETION:
+ return ErrorTypeUtils::COMPLETION;
+ default:
+ return ErrorTypeUtils::NOT_AN_ERROR;
+ }
+}
+} // namespace latinime
diff --git a/third_party/android_prediction/suggest/policyimpl/typing/typing_weighting.h b/third_party/android_prediction/suggest/policyimpl/typing/typing_weighting.h
new file mode 100644
index 0000000..f432444
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/typing/typing_weighting.h
@@ -0,0 +1,221 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_TYPING_WEIGHTING_H
+#define LATINIME_TYPING_WEIGHTING_H
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/core/dicnode/dic_node_utils.h"
+#include "third_party/android_prediction/suggest/core/dictionary/error_type_utils.h"
+#include "third_party/android_prediction/suggest/core/layout/touch_position_correction_utils.h"
+#include "third_party/android_prediction/suggest/core/policy/weighting.h"
+#include "third_party/android_prediction/suggest/core/session/dic_traverse_session.h"
+#include "third_party/android_prediction/suggest/policyimpl/typing/scoring_params.h"
+#include "third_party/android_prediction/utils/char_utils.h"
+
+namespace latinime {
+
+class DicNode;
+struct DicNode_InputStateG;
+class MultiBigramMap;
+
+class TypingWeighting : public Weighting {
+ public:
+ static const TypingWeighting *getInstance() { return &sInstance; }
+
+ protected:
+ float getTerminalSpatialCost(const DicTraverseSession *const traverseSession,
+ const DicNode *const dicNode) const {
+ float cost = 0.0f;
+ if (dicNode->hasMultipleWords()) {
+ cost += ScoringParams::HAS_MULTI_WORD_TERMINAL_COST;
+ }
+ if (dicNode->getProximityCorrectionCount() > 0) {
+ cost += ScoringParams::HAS_PROXIMITY_TERMINAL_COST;
+ }
+ if (dicNode->getEditCorrectionCount() > 0) {
+ cost += ScoringParams::HAS_EDIT_CORRECTION_TERMINAL_COST;
+ }
+ return cost;
+ }
+
+ float getOmissionCost(const DicNode *const parentDicNode, const DicNode *const dicNode) const {
+ const bool isZeroCostOmission = parentDicNode->isZeroCostOmission();
+ const bool isIntentionalOmission = parentDicNode->canBeIntentionalOmission();
+ const bool sameCodePoint = dicNode->isSameNodeCodePoint(parentDicNode);
+ // If the traversal omitted the first letter then the dicNode should now be on the second.
+ const bool isFirstLetterOmission = dicNode->getNodeCodePointCount() == 2;
+ float cost = 0.0f;
+ if (isZeroCostOmission) {
+ cost = 0.0f;
+ } else if (isIntentionalOmission) {
+ cost = ScoringParams::INTENTIONAL_OMISSION_COST;
+ } else if (isFirstLetterOmission) {
+ cost = ScoringParams::OMISSION_COST_FIRST_CHAR;
+ } else {
+ cost = sameCodePoint ? ScoringParams::OMISSION_COST_SAME_CHAR
+ : ScoringParams::OMISSION_COST;
+ }
+ return cost;
+ }
+
+ float getMatchedCost(const DicTraverseSession *const traverseSession,
+ const DicNode *const dicNode, DicNode_InputStateG *inputStateG) const {
+ const int pointIndex = dicNode->getInputIndex(0);
+ const float normalizedSquaredLength = traverseSession->getProximityInfoState(0)
+ ->getPointToKeyLength(pointIndex,
+ CharUtils::toBaseLowerCase(dicNode->getNodeCodePoint()));
+ const float normalizedDistance = TouchPositionCorrectionUtils::getSweetSpotFactor(
+ traverseSession->isTouchPositionCorrectionEnabled(), normalizedSquaredLength);
+ const float weightedDistance = ScoringParams::DISTANCE_WEIGHT_LENGTH * normalizedDistance;
+
+ const bool isFirstChar = pointIndex == 0;
+ const bool isProximity = isProximityDicNode(traverseSession, dicNode);
+ float cost = isProximity ? (isFirstChar ? ScoringParams::FIRST_CHAR_PROXIMITY_COST
+ : ScoringParams::PROXIMITY_COST) : 0.0f;
+ if (isProximity && dicNode->getProximityCorrectionCount() == 0) {
+ cost += ScoringParams::FIRST_PROXIMITY_COST;
+ }
+ if (dicNode->getNodeCodePointCount() == 2) {
+ // At the second character of the current word, we check if the first char is uppercase
+ // and the word is a second or later word of a multiple word suggestion. We demote it
+ // if so.
+ const bool isSecondOrLaterWordFirstCharUppercase =
+ dicNode->hasMultipleWords() && dicNode->isFirstCharUppercase();
+ if (isSecondOrLaterWordFirstCharUppercase) {
+ cost += ScoringParams::COST_SECOND_OR_LATER_WORD_FIRST_CHAR_UPPERCASE;
+ }
+ }
+ return weightedDistance + cost;
+ }
+
+ bool isProximityDicNode(const DicTraverseSession *const traverseSession,
+ const DicNode *const dicNode) const {
+ const int pointIndex = dicNode->getInputIndex(0);
+ const int primaryCodePoint = CharUtils::toBaseLowerCase(
+ traverseSession->getProximityInfoState(0)->getPrimaryCodePointAt(pointIndex));
+ const int dicNodeChar = CharUtils::toBaseLowerCase(dicNode->getNodeCodePoint());
+ return primaryCodePoint != dicNodeChar;
+ }
+
+ float getTranspositionCost(const DicTraverseSession *const traverseSession,
+ const DicNode *const parentDicNode, const DicNode *const dicNode) const {
+ const int16_t parentPointIndex = parentDicNode->getInputIndex(0);
+ const int prevCodePoint = parentDicNode->getNodeCodePoint();
+ const float distance1 = traverseSession->getProximityInfoState(0)->getPointToKeyLength(
+ parentPointIndex + 1, CharUtils::toBaseLowerCase(prevCodePoint));
+ const int codePoint = dicNode->getNodeCodePoint();
+ const float distance2 = traverseSession->getProximityInfoState(0)->getPointToKeyLength(
+ parentPointIndex, CharUtils::toBaseLowerCase(codePoint));
+ const float distance = distance1 + distance2;
+ const float weightedLengthDistance =
+ distance * ScoringParams::DISTANCE_WEIGHT_LENGTH;
+ return ScoringParams::TRANSPOSITION_COST + weightedLengthDistance;
+ }
+
+ float getInsertionCost(const DicTraverseSession *const traverseSession,
+ const DicNode *const parentDicNode, const DicNode *const dicNode) const {
+ const int16_t insertedPointIndex = parentDicNode->getInputIndex(0);
+ const int prevCodePoint = traverseSession->getProximityInfoState(0)->getPrimaryCodePointAt(
+ insertedPointIndex);
+ const int currentCodePoint = dicNode->getNodeCodePoint();
+ const bool sameCodePoint = prevCodePoint == currentCodePoint;
+ const bool existsAdjacentProximityChars = traverseSession->getProximityInfoState(0)
+ ->existsAdjacentProximityChars(insertedPointIndex);
+ const float dist = traverseSession->getProximityInfoState(0)->getPointToKeyLength(
+ insertedPointIndex + 1, CharUtils::toBaseLowerCase(dicNode->getNodeCodePoint()));
+ const float weightedDistance = dist * ScoringParams::DISTANCE_WEIGHT_LENGTH;
+ const bool singleChar = dicNode->getNodeCodePointCount() == 1;
+ float cost = (singleChar ? ScoringParams::INSERTION_COST_FIRST_CHAR : 0.0f);
+ if (sameCodePoint) {
+ cost += ScoringParams::INSERTION_COST_SAME_CHAR;
+ } else if (existsAdjacentProximityChars) {
+ cost += ScoringParams::INSERTION_COST_PROXIMITY_CHAR;
+ } else {
+ cost += ScoringParams::INSERTION_COST;
+ }
+ return cost + weightedDistance;
+ }
+
+ float getNewWordSpatialCost(const DicTraverseSession *const traverseSession,
+ const DicNode *const dicNode, DicNode_InputStateG *inputStateG) const {
+ return ScoringParams::COST_NEW_WORD * traverseSession->getMultiWordCostMultiplier();
+ }
+
+ float getNewWordBigramLanguageCost(const DicTraverseSession *const traverseSession,
+ const DicNode *const dicNode,
+ MultiBigramMap *const multiBigramMap) const {
+ return DicNodeUtils::getBigramNodeImprobability(
+ traverseSession->getDictionaryStructurePolicy(),
+ dicNode, multiBigramMap) * ScoringParams::DISTANCE_WEIGHT_LANGUAGE;
+ }
+
+ float getCompletionCost(const DicTraverseSession *const traverseSession,
+ const DicNode *const dicNode) const {
+ // The auto completion starts when the input index is same as the input size
+ const bool firstCompletion = dicNode->getInputIndex(0)
+ == traverseSession->getInputSize();
+ // TODO: Change the cost for the first completion for the gesture?
+ const float cost = firstCompletion ? ScoringParams::COST_FIRST_COMPLETION
+ : ScoringParams::COST_COMPLETION;
+ return cost;
+ }
+
+ float getTerminalLanguageCost(const DicTraverseSession *const traverseSession,
+ const DicNode *const dicNode, const float dicNodeLanguageImprobability) const {
+ return dicNodeLanguageImprobability * ScoringParams::DISTANCE_WEIGHT_LANGUAGE;
+ }
+
+ float getTerminalInsertionCost(const DicTraverseSession *const traverseSession,
+ const DicNode *const dicNode) const {
+ const int inputIndex = dicNode->getInputIndex(0);
+ const int inputSize = traverseSession->getInputSize();
+ ASSERT(inputIndex < inputSize);
+ // TODO: Implement more efficient logic
+ return ScoringParams::TERMINAL_INSERTION_COST * (inputSize - inputIndex);
+ }
+
+ AK_FORCE_INLINE bool needsToNormalizeCompoundDistance() const {
+ return false;
+ }
+
+ AK_FORCE_INLINE float getAdditionalProximityCost() const {
+ return ScoringParams::ADDITIONAL_PROXIMITY_COST;
+ }
+
+ AK_FORCE_INLINE float getSubstitutionCost() const {
+ return ScoringParams::SUBSTITUTION_COST;
+ }
+
+ AK_FORCE_INLINE float getSpaceSubstitutionCost(const DicTraverseSession *const traverseSession,
+ const DicNode *const dicNode) const {
+ const float cost = ScoringParams::SPACE_SUBSTITUTION_COST + ScoringParams::COST_NEW_WORD;
+ return cost * traverseSession->getMultiWordCostMultiplier();
+ }
+
+ ErrorTypeUtils::ErrorType getErrorType(const CorrectionType correctionType,
+ const DicTraverseSession *const traverseSession,
+ const DicNode *const parentDicNode, const DicNode *const dicNode) const;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(TypingWeighting);
+ static const TypingWeighting sInstance;
+
+ TypingWeighting() {}
+ ~TypingWeighting() {}
+};
+} // namespace latinime
+#endif // LATINIME_TYPING_WEIGHTING_H
diff --git a/third_party/android_prediction/suggest/policyimpl/utils/damerau_levenshtein_edit_distance_policy.h b/third_party/android_prediction/suggest/policyimpl/utils/damerau_levenshtein_edit_distance_policy.h
new file mode 100644
index 0000000..e791d70
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/utils/damerau_levenshtein_edit_distance_policy.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DAEMARU_LEVENSHTEIN_EDIT_DISTANCE_POLICY_H
+#define LATINIME_DAEMARU_LEVENSHTEIN_EDIT_DISTANCE_POLICY_H
+
+#include "third_party/android_prediction/suggest/policyimpl/utils/edit_distance_policy.h"
+#include "third_party/android_prediction/utils/char_utils.h"
+
+namespace latinime {
+
+class DamerauLevenshteinEditDistancePolicy : public EditDistancePolicy {
+ public:
+ DamerauLevenshteinEditDistancePolicy(const int *const string0, const int length0,
+ const int *const string1, const int length1)
+ : mString0(string0), mString0Length(length0), mString1(string1),
+ mString1Length(length1) {}
+ ~DamerauLevenshteinEditDistancePolicy() {}
+
+ AK_FORCE_INLINE float getSubstitutionCost(const int index0, const int index1) const {
+ const int c0 = CharUtils::toBaseLowerCase(mString0[index0]);
+ const int c1 = CharUtils::toBaseLowerCase(mString1[index1]);
+ return (c0 == c1) ? 0.0f : 1.0f;
+ }
+
+ AK_FORCE_INLINE float getDeletionCost(const int index0, const int index1) const {
+ return 1.0f;
+ }
+
+ AK_FORCE_INLINE float getInsertionCost(const int index0, const int index1) const {
+ return 1.0f;
+ }
+
+ AK_FORCE_INLINE bool allowTransposition(const int index0, const int index1) const {
+ const int c0 = CharUtils::toBaseLowerCase(mString0[index0]);
+ const int c1 = CharUtils::toBaseLowerCase(mString1[index1]);
+ if (index0 > 0 && index1 > 0 && c0 == CharUtils::toBaseLowerCase(mString1[index1 - 1])
+ && c1 == CharUtils::toBaseLowerCase(mString0[index0 - 1])) {
+ return true;
+ }
+ return false;
+ }
+
+ AK_FORCE_INLINE float getTranspositionCost(const int index0, const int index1) const {
+ return getSubstitutionCost(index0, index1);
+ }
+
+ AK_FORCE_INLINE int getString0Length() const {
+ return mString0Length;
+ }
+
+ AK_FORCE_INLINE int getString1Length() const {
+ return mString1Length;
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN (DamerauLevenshteinEditDistancePolicy);
+
+ const int *const mString0;
+ const int mString0Length;
+ const int *const mString1;
+ const int mString1Length;
+};
+} // namespace latinime
+
+#endif // LATINIME_DAEMARU_LEVENSHTEIN_EDIT_DISTANCE_POLICY_H
diff --git a/third_party/android_prediction/suggest/policyimpl/utils/edit_distance.h b/third_party/android_prediction/suggest/policyimpl/utils/edit_distance.h
new file mode 100644
index 0000000..5df81dc
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/utils/edit_distance.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_EDIT_DISTANCE_H
+#define LATINIME_EDIT_DISTANCE_H
+
+#include <algorithm>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/utils/edit_distance_policy.h"
+
+namespace latinime {
+
+class EditDistance {
+ public:
+ // CAVEAT: There may be performance penalty if you need the edit distance as an integer value.
+ AK_FORCE_INLINE static float getEditDistance(const EditDistancePolicy *const policy) {
+ const int beforeLength = policy->getString0Length();
+ const int afterLength = policy->getString1Length();
+ float dp[(beforeLength + 1) * (afterLength + 1)];
+ for (int i = 0; i <= beforeLength; ++i) {
+ dp[(afterLength + 1) * i] = i * policy->getInsertionCost(i - 1, -1);
+ }
+ for (int i = 0; i <= afterLength; ++i) {
+ dp[i] = i * policy->getDeletionCost(-1, i - 1);
+ }
+
+ for (int i = 0; i < beforeLength; ++i) {
+ for (int j = 0; j < afterLength; ++j) {
+ dp[(afterLength + 1) * (i + 1) + (j + 1)] = std::min(
+ dp[(afterLength + 1) * i + (j + 1)] + policy->getInsertionCost(i, j),
+ std::min(
+ dp[(afterLength + 1) * (i + 1) + j] + policy->getDeletionCost(i, j),
+ dp[(afterLength + 1) * i + j] + policy->getSubstitutionCost(i, j)));
+ if (policy->allowTransposition(i, j)) {
+ dp[(afterLength + 1) * (i + 1) + (j + 1)] = std::min(
+ dp[(afterLength + 1) * (i + 1) + (j + 1)],
+ dp[(afterLength + 1) * (i - 1) + (j - 1)]
+ + policy->getTranspositionCost(i, j));
+ }
+ }
+ }
+ if (DEBUG_EDIT_DISTANCE) {
+ AKLOGI("IN = %d, OUT = %d", beforeLength, afterLength);
+ for (int i = 0; i < beforeLength + 1; ++i) {
+ for (int j = 0; j < afterLength + 1; ++j) {
+ AKLOGI("EDIT[%d][%d], %f", i, j, dp[(afterLength + 1) * i + j]);
+ }
+ }
+ }
+ return dp[(beforeLength + 1) * (afterLength + 1) - 1];
+ }
+
+ AK_FORCE_INLINE static void dumpEditDistance10ForDebug(const float *const editDistanceTable,
+ const int editDistanceTableWidth, const int outputLength) {
+ if (DEBUG_DICT) {
+ AKLOGI("EditDistanceTable");
+ for (int i = 0; i <= 10; ++i) {
+ float c[11];
+ for (int j = 0; j <= 10; ++j) {
+ if (j < editDistanceTableWidth + 1 && i < outputLength + 1) {
+ c[j] = (editDistanceTable + i * (editDistanceTableWidth + 1))[j];
+ } else {
+ c[j] = -1.0f;
+ }
+ }
+ AKLOGI("[ %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f ]",
+ c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7], c[8], c[9], c[10]);
+ (void)c; // To suppress compiler warning
+ }
+ }
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(EditDistance);
+};
+} // namespace latinime
+
+#endif // LATINIME_EDIT_DISTANCE_H
diff --git a/third_party/android_prediction/suggest/policyimpl/utils/edit_distance_policy.h b/third_party/android_prediction/suggest/policyimpl/utils/edit_distance_policy.h
new file mode 100644
index 0000000..426d622
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/utils/edit_distance_policy.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_EDIT_DISTANCE_POLICY_H
+#define LATINIME_EDIT_DISTANCE_POLICY_H
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+class EditDistancePolicy {
+ public:
+ virtual float getSubstitutionCost(const int index0, const int index1) const = 0;
+ virtual float getDeletionCost(const int index0, const int index1) const = 0;
+ virtual float getInsertionCost(const int index0, const int index1) const = 0;
+ virtual bool allowTransposition(const int index0, const int index1) const = 0;
+ virtual float getTranspositionCost(const int index0, const int index1) const = 0;
+ virtual int getString0Length() const = 0;
+ virtual int getString1Length() const = 0;
+
+ protected:
+ EditDistancePolicy() {}
+ virtual ~EditDistancePolicy() {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(EditDistancePolicy);
+};
+} // namespace latinime
+
+#endif // LATINIME_EDIT_DISTANCE_POLICY_H
diff --git a/third_party/android_prediction/utils/autocorrection_threshold_utils.cpp b/third_party/android_prediction/utils/autocorrection_threshold_utils.cpp
new file mode 100644
index 0000000..7d533ee
--- /dev/null
+++ b/third_party/android_prediction/utils/autocorrection_threshold_utils.cpp
@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/utils/autocorrection_threshold_utils.h"
+
+#include <algorithm>
+#include <cmath>
+
+#include "third_party/android_prediction/defines.h"
+#include "third_party/android_prediction/suggest/policyimpl/utils/edit_distance.h"
+#include "third_party/android_prediction/suggest/policyimpl/utils/damerau_levenshtein_edit_distance_policy.h"
+
+namespace latinime {
+
+const int AutocorrectionThresholdUtils::MAX_INITIAL_SCORE = 255;
+const int AutocorrectionThresholdUtils::TYPED_LETTER_MULTIPLIER = 2;
+const int AutocorrectionThresholdUtils::FULL_WORD_MULTIPLIER = 2;
+
+/* static */ int AutocorrectionThresholdUtils::editDistance(const int *before,
+ const int beforeLength, const int *after, const int afterLength) {
+ const DamerauLevenshteinEditDistancePolicy daemaruLevenshtein(
+ before, beforeLength, after, afterLength);
+ return static_cast<int>(EditDistance::getEditDistance(&daemaruLevenshtein));
+}
+
+// In dictionary.cpp, getSuggestion() method,
+// When USE_SUGGEST_INTERFACE_FOR_TYPING is true:
+//
+// // TODO: Revise the following logic thoroughly by referring to the logic
+// // marked as "Otherwise" below.
+// SUGGEST_INTERFACE_OUTPUT_SCALE was multiplied to the original suggestion scores to convert
+// them to integers.
+// score = (int)((original score) * SUGGEST_INTERFACE_OUTPUT_SCALE)
+// Undo the scaling here to recover the original score.
+// normalizedScore = ((float)score) / SUGGEST_INTERFACE_OUTPUT_SCALE
+//
+// Otherwise: suggestion scores are computed using the below formula.
+// original score
+// := powf(mTypedLetterMultiplier (this is defined 2),
+// (the number of matched characters between typed word and suggested word))
+// * (individual word's score which defined in the unigram dictionary,
+// and this score is defined in range [0, 255].)
+// Then, the following processing is applied.
+// - If the dictionary word is matched up to the point of the user entry
+// (full match up to min(before.length(), after.length())
+// => Then multiply by FULL_MATCHED_WORDS_PROMOTION_RATE (this is defined 1.2)
+// - If the word is a true full match except for differences in accents or
+// capitalization, then treat it as if the score was 255.
+// - If before.length() == after.length()
+// => multiply by mFullWordMultiplier (this is defined 2))
+// So, maximum original score is powf(2, min(before.length(), after.length())) * 255 * 2 * 1.2
+// For historical reasons we ignore the 1.2 modifier (because the measure for a good
+// autocorrection threshold was done at a time when it didn't exist). This doesn't change
+// the result.
+// So, we can normalize original score by dividing powf(2, min(b.l(),a.l())) * 255 * 2.
+
+/* static */ float AutocorrectionThresholdUtils::calcNormalizedScore(const int *before,
+ const int beforeLength, const int *after, const int afterLength, const int score) {
+ if (0 == beforeLength || 0 == afterLength) {
+ return 0.0f;
+ }
+ const int distance = editDistance(before, beforeLength, after, afterLength);
+ int spaceCount = 0;
+ for (int i = 0; i < afterLength; ++i) {
+ if (after[i] == KEYCODE_SPACE) {
+ ++spaceCount;
+ }
+ }
+
+ if (spaceCount == afterLength) {
+ return 0.0f;
+ }
+
+ if (score <= 0 || distance >= afterLength) {
+ // normalizedScore must be 0.0f (the minimum value) if the score is less than or equal to 0,
+ // or if the edit distance is larger than or equal to afterLength.
+ return 0.0f;
+ }
+ // add a weight based on edit distance.
+ const float weight = 1.0f - static_cast<float>(distance) / static_cast<float>(afterLength);
+
+ // TODO: Revise the following logic thoroughly by referring to...
+ if (true /* USE_SUGGEST_INTERFACE_FOR_TYPING */) {
+ return (static_cast<float>(score) / SUGGEST_INTERFACE_OUTPUT_SCALE) * weight;
+ }
+ // ...this logic.
+ const float maxScore = score >= S_INT_MAX ? static_cast<float>(S_INT_MAX)
+ : static_cast<float>(MAX_INITIAL_SCORE)
+ * powf(static_cast<float>(TYPED_LETTER_MULTIPLIER),
+ static_cast<float>(std::min(beforeLength, afterLength - spaceCount)))
+ * static_cast<float>(FULL_WORD_MULTIPLIER);
+
+ return (static_cast<float>(score) / maxScore) * weight;
+}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/utils/autocorrection_threshold_utils.h b/third_party/android_prediction/utils/autocorrection_threshold_utils.h
new file mode 100644
index 0000000..1ddf5c5
--- /dev/null
+++ b/third_party/android_prediction/utils/autocorrection_threshold_utils.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_AUTOCORRECTION_THRESHOLD_UTILS_H
+#define LATINIME_AUTOCORRECTION_THRESHOLD_UTILS_H
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+class AutocorrectionThresholdUtils {
+ public:
+ static float calcNormalizedScore(const int *before, const int beforeLength,
+ const int *after, const int afterLength, const int score);
+ static int editDistance(const int *before, const int beforeLength, const int *after,
+ const int afterLength);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(AutocorrectionThresholdUtils);
+
+ static const int MAX_INITIAL_SCORE;
+ static const int TYPED_LETTER_MULTIPLIER;
+ static const int FULL_WORD_MULTIPLIER;
+};
+} // namespace latinime
+#endif // LATINIME_AUTOCORRECTION_THRESHOLD_UTILS_H
diff --git a/third_party/android_prediction/utils/byte_array_view.h b/third_party/android_prediction/utils/byte_array_view.h
new file mode 100644
index 0000000..0bab8d7
--- /dev/null
+++ b/third_party/android_prediction/utils/byte_array_view.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_BYTE_ARRAY_VIEW_H
+#define LATINIME_BYTE_ARRAY_VIEW_H
+
+#include <cstdint>
+#include <cstdlib>
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+/**
+ * Helper class used to keep track of read accesses for a given memory region.
+ */
+class ReadOnlyByteArrayView {
+ public:
+ ReadOnlyByteArrayView() : mPtr(nullptr), mSize(0) {}
+
+ ReadOnlyByteArrayView(const uint8_t *const ptr, const size_t size)
+ : mPtr(ptr), mSize(size) {}
+
+ AK_FORCE_INLINE size_t size() const {
+ return mSize;
+ }
+
+ AK_FORCE_INLINE const uint8_t *data() const {
+ return mPtr;
+ }
+
+ private:
+ DISALLOW_ASSIGNMENT_OPERATOR(ReadOnlyByteArrayView);
+
+ const uint8_t *const mPtr;
+ const size_t mSize;
+};
+
+/**
+ * Helper class used to keep track of read-write accesses for a given memory region.
+ */
+class ReadWriteByteArrayView {
+ public:
+ ReadWriteByteArrayView() : mPtr(nullptr), mSize(0) {}
+
+ ReadWriteByteArrayView(uint8_t *const ptr, const size_t size)
+ : mPtr(ptr), mSize(size) {}
+
+ AK_FORCE_INLINE size_t size() const {
+ return mSize;
+ }
+
+ AK_FORCE_INLINE uint8_t *data() const {
+ return mPtr;
+ }
+
+ AK_FORCE_INLINE ReadOnlyByteArrayView getReadOnlyView() const {
+ return ReadOnlyByteArrayView(mPtr, mSize);
+ }
+
+ ReadWriteByteArrayView subView(const size_t start, const size_t n) const {
+ ASSERT(start + n <= mSize);
+ return ReadWriteByteArrayView(mPtr + start, n);
+ }
+
+ private:
+ DISALLOW_ASSIGNMENT_OPERATOR(ReadWriteByteArrayView);
+
+ uint8_t *const mPtr;
+ const size_t mSize;
+};
+
+} // namespace latinime
+#endif // LATINIME_BYTE_ARRAY_VIEW_H
diff --git a/third_party/android_prediction/utils/char_utils.cpp b/third_party/android_prediction/utils/char_utils.cpp
new file mode 100644
index 0000000..0e2532f
--- /dev/null
+++ b/third_party/android_prediction/utils/char_utils.cpp
@@ -0,0 +1,1282 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/utils/char_utils.h"
+
+#include <cstdlib>
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+const int CharUtils::MIN_UNICODE_CODE_POINT = 0;
+const int CharUtils::MAX_UNICODE_CODE_POINT = 0x10FFFF;
+
+struct LatinCapitalSmallPair {
+ unsigned short capital;
+ unsigned short small;
+};
+
+/*
+ * How to update the SORTED_CHAR_MAP[] array.
+ *
+ * 1. Download http://unicode.org/Public/UNIDATA/UnicodeData.txt
+ *
+ * 2. Have a latest version of ICU4C dev package installed
+ * (Note: the current data has been generated with version 4.8)
+ * $ apt-get install libicu-dev
+ *
+ * 3. Build the following code
+ * $ g++ -o char_utils -I.. -DUPDATING_CHAR_UTILS char_utils.cpp -licuuc
+ */
+#ifdef UPDATING_CHAR_UTILS
+#include <stdio.h>
+#include <unicode/uchar.h> // ICU4C
+
+extern "C" int main() {
+ for (unsigned short c = 0; c < 0xFFFF; c++) {
+ if (c <= 0x7F) continue;
+ const unsigned short icu4cLowerC = u_tolower(c);
+ const unsigned short myLowerC = CharUtils::latin_tolower(c);
+ if (c != icu4cLowerC) {
+#ifdef CONFIRMING_CHAR_UTILS
+ if (icu4cLowerC != myLowerC) {
+ fprintf(stderr, "icu4cLowerC != myLowerC, 0x%04X, 0x%04X\n", icu4cLowerC, myLowerC);
+ }
+#else // CONFIRMING_CHAR_UTILS
+ printf("0x%04X, 0x%04X\n", c, icu4cLowerC);
+#endif // CONFIRMING_CHAR_UTILS
+ }
+ }
+}
+#endif // UPDATING_CHAR_UTILS
+/*
+ * 4. Process the list with UnicodeData.txt
+ * (You need UnicodeData.txt in the current directory)
+ * $ ./char_utils | sort -u | \
+ * perl -e 'open(FH, "UnicodeData.txt"); @buf = <FH>; close(FH); \
+ * while(<>){/0x(\w*), 0x(\w*)/; @lines = grep(/^$1/, @buf); @cols = split(/;/, $lines[0]); \
+ * print " { 0x$1, 0x$cols[13] }, // $cols[1]\n";}'
+ *
+ * 5. Update the SORTED_CHAR_MAP[] array below with the output above.
+ * Then, rebuild with -DCONFIRMING_CHAR_UTILS and confirm the program exits successfully.
+ * $ g++ -o char_utils -I.. -DUPDATING_CHAR_UTILS -DCONFIRMING_CHAR_UTILS char_utils.cpp -licuuc
+ * $ ./char_utils
+ * $
+ */
+static const struct LatinCapitalSmallPair SORTED_CHAR_MAP[] = {
+ { 0x00C0, 0x00E0 }, // LATIN CAPITAL LETTER A WITH GRAVE
+ { 0x00C1, 0x00E1 }, // LATIN CAPITAL LETTER A WITH ACUTE
+ { 0x00C2, 0x00E2 }, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ { 0x00C3, 0x00E3 }, // LATIN CAPITAL LETTER A WITH TILDE
+ { 0x00C4, 0x00E4 }, // LATIN CAPITAL LETTER A WITH DIAERESIS
+ { 0x00C5, 0x00E5 }, // LATIN CAPITAL LETTER A WITH RING ABOVE
+ { 0x00C6, 0x00E6 }, // LATIN CAPITAL LETTER AE
+ { 0x00C7, 0x00E7 }, // LATIN CAPITAL LETTER C WITH CEDILLA
+ { 0x00C8, 0x00E8 }, // LATIN CAPITAL LETTER E WITH GRAVE
+ { 0x00C9, 0x00E9 }, // LATIN CAPITAL LETTER E WITH ACUTE
+ { 0x00CA, 0x00EA }, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ { 0x00CB, 0x00EB }, // LATIN CAPITAL LETTER E WITH DIAERESIS
+ { 0x00CC, 0x00EC }, // LATIN CAPITAL LETTER I WITH GRAVE
+ { 0x00CD, 0x00ED }, // LATIN CAPITAL LETTER I WITH ACUTE
+ { 0x00CE, 0x00EE }, // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ { 0x00CF, 0x00EF }, // LATIN CAPITAL LETTER I WITH DIAERESIS
+ { 0x00D0, 0x00F0 }, // LATIN CAPITAL LETTER ETH
+ { 0x00D1, 0x00F1 }, // LATIN CAPITAL LETTER N WITH TILDE
+ { 0x00D2, 0x00F2 }, // LATIN CAPITAL LETTER O WITH GRAVE
+ { 0x00D3, 0x00F3 }, // LATIN CAPITAL LETTER O WITH ACUTE
+ { 0x00D4, 0x00F4 }, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ { 0x00D5, 0x00F5 }, // LATIN CAPITAL LETTER O WITH TILDE
+ { 0x00D6, 0x00F6 }, // LATIN CAPITAL LETTER O WITH DIAERESIS
+ { 0x00D8, 0x00F8 }, // LATIN CAPITAL LETTER O WITH STROKE
+ { 0x00D9, 0x00F9 }, // LATIN CAPITAL LETTER U WITH GRAVE
+ { 0x00DA, 0x00FA }, // LATIN CAPITAL LETTER U WITH ACUTE
+ { 0x00DB, 0x00FB }, // LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ { 0x00DC, 0x00FC }, // LATIN CAPITAL LETTER U WITH DIAERESIS
+ { 0x00DD, 0x00FD }, // LATIN CAPITAL LETTER Y WITH ACUTE
+ { 0x00DE, 0x00FE }, // LATIN CAPITAL LETTER THORN
+ { 0x0100, 0x0101 }, // LATIN CAPITAL LETTER A WITH MACRON
+ { 0x0102, 0x0103 }, // LATIN CAPITAL LETTER A WITH BREVE
+ { 0x0104, 0x0105 }, // LATIN CAPITAL LETTER A WITH OGONEK
+ { 0x0106, 0x0107 }, // LATIN CAPITAL LETTER C WITH ACUTE
+ { 0x0108, 0x0109 }, // LATIN CAPITAL LETTER C WITH CIRCUMFLEX
+ { 0x010A, 0x010B }, // LATIN CAPITAL LETTER C WITH DOT ABOVE
+ { 0x010C, 0x010D }, // LATIN CAPITAL LETTER C WITH CARON
+ { 0x010E, 0x010F }, // LATIN CAPITAL LETTER D WITH CARON
+ { 0x0110, 0x0111 }, // LATIN CAPITAL LETTER D WITH STROKE
+ { 0x0112, 0x0113 }, // LATIN CAPITAL LETTER E WITH MACRON
+ { 0x0114, 0x0115 }, // LATIN CAPITAL LETTER E WITH BREVE
+ { 0x0116, 0x0117 }, // LATIN CAPITAL LETTER E WITH DOT ABOVE
+ { 0x0118, 0x0119 }, // LATIN CAPITAL LETTER E WITH OGONEK
+ { 0x011A, 0x011B }, // LATIN CAPITAL LETTER E WITH CARON
+ { 0x011C, 0x011D }, // LATIN CAPITAL LETTER G WITH CIRCUMFLEX
+ { 0x011E, 0x011F }, // LATIN CAPITAL LETTER G WITH BREVE
+ { 0x0120, 0x0121 }, // LATIN CAPITAL LETTER G WITH DOT ABOVE
+ { 0x0122, 0x0123 }, // LATIN CAPITAL LETTER G WITH CEDILLA
+ { 0x0124, 0x0125 }, // LATIN CAPITAL LETTER H WITH CIRCUMFLEX
+ { 0x0126, 0x0127 }, // LATIN CAPITAL LETTER H WITH STROKE
+ { 0x0128, 0x0129 }, // LATIN CAPITAL LETTER I WITH TILDE
+ { 0x012A, 0x012B }, // LATIN CAPITAL LETTER I WITH MACRON
+ { 0x012C, 0x012D }, // LATIN CAPITAL LETTER I WITH BREVE
+ { 0x012E, 0x012F }, // LATIN CAPITAL LETTER I WITH OGONEK
+ { 0x0130, 0x0069 }, // LATIN CAPITAL LETTER I WITH DOT ABOVE
+ { 0x0132, 0x0133 }, // LATIN CAPITAL LIGATURE IJ
+ { 0x0134, 0x0135 }, // LATIN CAPITAL LETTER J WITH CIRCUMFLEX
+ { 0x0136, 0x0137 }, // LATIN CAPITAL LETTER K WITH CEDILLA
+ { 0x0139, 0x013A }, // LATIN CAPITAL LETTER L WITH ACUTE
+ { 0x013B, 0x013C }, // LATIN CAPITAL LETTER L WITH CEDILLA
+ { 0x013D, 0x013E }, // LATIN CAPITAL LETTER L WITH CARON
+ { 0x013F, 0x0140 }, // LATIN CAPITAL LETTER L WITH MIDDLE DOT
+ { 0x0141, 0x0142 }, // LATIN CAPITAL LETTER L WITH STROKE
+ { 0x0143, 0x0144 }, // LATIN CAPITAL LETTER N WITH ACUTE
+ { 0x0145, 0x0146 }, // LATIN CAPITAL LETTER N WITH CEDILLA
+ { 0x0147, 0x0148 }, // LATIN CAPITAL LETTER N WITH CARON
+ { 0x014A, 0x014B }, // LATIN CAPITAL LETTER ENG
+ { 0x014C, 0x014D }, // LATIN CAPITAL LETTER O WITH MACRON
+ { 0x014E, 0x014F }, // LATIN CAPITAL LETTER O WITH BREVE
+ { 0x0150, 0x0151 }, // LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+ { 0x0152, 0x0153 }, // LATIN CAPITAL LIGATURE OE
+ { 0x0154, 0x0155 }, // LATIN CAPITAL LETTER R WITH ACUTE
+ { 0x0156, 0x0157 }, // LATIN CAPITAL LETTER R WITH CEDILLA
+ { 0x0158, 0x0159 }, // LATIN CAPITAL LETTER R WITH CARON
+ { 0x015A, 0x015B }, // LATIN CAPITAL LETTER S WITH ACUTE
+ { 0x015C, 0x015D }, // LATIN CAPITAL LETTER S WITH CIRCUMFLEX
+ { 0x015E, 0x015F }, // LATIN CAPITAL LETTER S WITH CEDILLA
+ { 0x0160, 0x0161 }, // LATIN CAPITAL LETTER S WITH CARON
+ { 0x0162, 0x0163 }, // LATIN CAPITAL LETTER T WITH CEDILLA
+ { 0x0164, 0x0165 }, // LATIN CAPITAL LETTER T WITH CARON
+ { 0x0166, 0x0167 }, // LATIN CAPITAL LETTER T WITH STROKE
+ { 0x0168, 0x0169 }, // LATIN CAPITAL LETTER U WITH TILDE
+ { 0x016A, 0x016B }, // LATIN CAPITAL LETTER U WITH MACRON
+ { 0x016C, 0x016D }, // LATIN CAPITAL LETTER U WITH BREVE
+ { 0x016E, 0x016F }, // LATIN CAPITAL LETTER U WITH RING ABOVE
+ { 0x0170, 0x0171 }, // LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+ { 0x0172, 0x0173 }, // LATIN CAPITAL LETTER U WITH OGONEK
+ { 0x0174, 0x0175 }, // LATIN CAPITAL LETTER W WITH CIRCUMFLEX
+ { 0x0176, 0x0177 }, // LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
+ { 0x0178, 0x00FF }, // LATIN CAPITAL LETTER Y WITH DIAERESIS
+ { 0x0179, 0x017A }, // LATIN CAPITAL LETTER Z WITH ACUTE
+ { 0x017B, 0x017C }, // LATIN CAPITAL LETTER Z WITH DOT ABOVE
+ { 0x017D, 0x017E }, // LATIN CAPITAL LETTER Z WITH CARON
+ { 0x0181, 0x0253 }, // LATIN CAPITAL LETTER B WITH HOOK
+ { 0x0182, 0x0183 }, // LATIN CAPITAL LETTER B WITH TOPBAR
+ { 0x0184, 0x0185 }, // LATIN CAPITAL LETTER TONE SIX
+ { 0x0186, 0x0254 }, // LATIN CAPITAL LETTER OPEN O
+ { 0x0187, 0x0188 }, // LATIN CAPITAL LETTER C WITH HOOK
+ { 0x0189, 0x0256 }, // LATIN CAPITAL LETTER AFRICAN D
+ { 0x018A, 0x0257 }, // LATIN CAPITAL LETTER D WITH HOOK
+ { 0x018B, 0x018C }, // LATIN CAPITAL LETTER D WITH TOPBAR
+ { 0x018E, 0x01DD }, // LATIN CAPITAL LETTER REVERSED E
+ { 0x018F, 0x0259 }, // LATIN CAPITAL LETTER SCHWA
+ { 0x0190, 0x025B }, // LATIN CAPITAL LETTER OPEN E
+ { 0x0191, 0x0192 }, // LATIN CAPITAL LETTER F WITH HOOK
+ { 0x0193, 0x0260 }, // LATIN CAPITAL LETTER G WITH HOOK
+ { 0x0194, 0x0263 }, // LATIN CAPITAL LETTER GAMMA
+ { 0x0196, 0x0269 }, // LATIN CAPITAL LETTER IOTA
+ { 0x0197, 0x0268 }, // LATIN CAPITAL LETTER I WITH STROKE
+ { 0x0198, 0x0199 }, // LATIN CAPITAL LETTER K WITH HOOK
+ { 0x019C, 0x026F }, // LATIN CAPITAL LETTER TURNED M
+ { 0x019D, 0x0272 }, // LATIN CAPITAL LETTER N WITH LEFT HOOK
+ { 0x019F, 0x0275 }, // LATIN CAPITAL LETTER O WITH MIDDLE TILDE
+ { 0x01A0, 0x01A1 }, // LATIN CAPITAL LETTER O WITH HORN
+ { 0x01A2, 0x01A3 }, // LATIN CAPITAL LETTER OI
+ { 0x01A4, 0x01A5 }, // LATIN CAPITAL LETTER P WITH HOOK
+ { 0x01A6, 0x0280 }, // LATIN LETTER YR
+ { 0x01A7, 0x01A8 }, // LATIN CAPITAL LETTER TONE TWO
+ { 0x01A9, 0x0283 }, // LATIN CAPITAL LETTER ESH
+ { 0x01AC, 0x01AD }, // LATIN CAPITAL LETTER T WITH HOOK
+ { 0x01AE, 0x0288 }, // LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
+ { 0x01AF, 0x01B0 }, // LATIN CAPITAL LETTER U WITH HORN
+ { 0x01B1, 0x028A }, // LATIN CAPITAL LETTER UPSILON
+ { 0x01B2, 0x028B }, // LATIN CAPITAL LETTER V WITH HOOK
+ { 0x01B3, 0x01B4 }, // LATIN CAPITAL LETTER Y WITH HOOK
+ { 0x01B5, 0x01B6 }, // LATIN CAPITAL LETTER Z WITH STROKE
+ { 0x01B7, 0x0292 }, // LATIN CAPITAL LETTER EZH
+ { 0x01B8, 0x01B9 }, // LATIN CAPITAL LETTER EZH REVERSED
+ { 0x01BC, 0x01BD }, // LATIN CAPITAL LETTER TONE FIVE
+ { 0x01C4, 0x01C6 }, // LATIN CAPITAL LETTER DZ WITH CARON
+ { 0x01C5, 0x01C6 }, // LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
+ { 0x01C7, 0x01C9 }, // LATIN CAPITAL LETTER LJ
+ { 0x01C8, 0x01C9 }, // LATIN CAPITAL LETTER L WITH SMALL LETTER J
+ { 0x01CA, 0x01CC }, // LATIN CAPITAL LETTER NJ
+ { 0x01CB, 0x01CC }, // LATIN CAPITAL LETTER N WITH SMALL LETTER J
+ { 0x01CD, 0x01CE }, // LATIN CAPITAL LETTER A WITH CARON
+ { 0x01CF, 0x01D0 }, // LATIN CAPITAL LETTER I WITH CARON
+ { 0x01D1, 0x01D2 }, // LATIN CAPITAL LETTER O WITH CARON
+ { 0x01D3, 0x01D4 }, // LATIN CAPITAL LETTER U WITH CARON
+ { 0x01D5, 0x01D6 }, // LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
+ { 0x01D7, 0x01D8 }, // LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
+ { 0x01D9, 0x01DA }, // LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
+ { 0x01DB, 0x01DC }, // LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
+ { 0x01DE, 0x01DF }, // LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
+ { 0x01E0, 0x01E1 }, // LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
+ { 0x01E2, 0x01E3 }, // LATIN CAPITAL LETTER AE WITH MACRON
+ { 0x01E4, 0x01E5 }, // LATIN CAPITAL LETTER G WITH STROKE
+ { 0x01E6, 0x01E7 }, // LATIN CAPITAL LETTER G WITH CARON
+ { 0x01E8, 0x01E9 }, // LATIN CAPITAL LETTER K WITH CARON
+ { 0x01EA, 0x01EB }, // LATIN CAPITAL LETTER O WITH OGONEK
+ { 0x01EC, 0x01ED }, // LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
+ { 0x01EE, 0x01EF }, // LATIN CAPITAL LETTER EZH WITH CARON
+ { 0x01F1, 0x01F3 }, // LATIN CAPITAL LETTER DZ
+ { 0x01F2, 0x01F3 }, // LATIN CAPITAL LETTER D WITH SMALL LETTER Z
+ { 0x01F4, 0x01F5 }, // LATIN CAPITAL LETTER G WITH ACUTE
+ { 0x01F6, 0x0195 }, // LATIN CAPITAL LETTER HWAIR
+ { 0x01F7, 0x01BF }, // LATIN CAPITAL LETTER WYNN
+ { 0x01F8, 0x01F9 }, // LATIN CAPITAL LETTER N WITH GRAVE
+ { 0x01FA, 0x01FB }, // LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
+ { 0x01FC, 0x01FD }, // LATIN CAPITAL LETTER AE WITH ACUTE
+ { 0x01FE, 0x01FF }, // LATIN CAPITAL LETTER O WITH STROKE AND ACUTE
+ { 0x0200, 0x0201 }, // LATIN CAPITAL LETTER A WITH DOUBLE GRAVE
+ { 0x0202, 0x0203 }, // LATIN CAPITAL LETTER A WITH INVERTED BREVE
+ { 0x0204, 0x0205 }, // LATIN CAPITAL LETTER E WITH DOUBLE GRAVE
+ { 0x0206, 0x0207 }, // LATIN CAPITAL LETTER E WITH INVERTED BREVE
+ { 0x0208, 0x0209 }, // LATIN CAPITAL LETTER I WITH DOUBLE GRAVE
+ { 0x020A, 0x020B }, // LATIN CAPITAL LETTER I WITH INVERTED BREVE
+ { 0x020C, 0x020D }, // LATIN CAPITAL LETTER O WITH DOUBLE GRAVE
+ { 0x020E, 0x020F }, // LATIN CAPITAL LETTER O WITH INVERTED BREVE
+ { 0x0210, 0x0211 }, // LATIN CAPITAL LETTER R WITH DOUBLE GRAVE
+ { 0x0212, 0x0213 }, // LATIN CAPITAL LETTER R WITH INVERTED BREVE
+ { 0x0214, 0x0215 }, // LATIN CAPITAL LETTER U WITH DOUBLE GRAVE
+ { 0x0216, 0x0217 }, // LATIN CAPITAL LETTER U WITH INVERTED BREVE
+ { 0x0218, 0x0219 }, // LATIN CAPITAL LETTER S WITH COMMA BELOW
+ { 0x021A, 0x021B }, // LATIN CAPITAL LETTER T WITH COMMA BELOW
+ { 0x021C, 0x021D }, // LATIN CAPITAL LETTER YOGH
+ { 0x021E, 0x021F }, // LATIN CAPITAL LETTER H WITH CARON
+ { 0x0220, 0x019E }, // LATIN CAPITAL LETTER N WITH LONG RIGHT LEG
+ { 0x0222, 0x0223 }, // LATIN CAPITAL LETTER OU
+ { 0x0224, 0x0225 }, // LATIN CAPITAL LETTER Z WITH HOOK
+ { 0x0226, 0x0227 }, // LATIN CAPITAL LETTER A WITH DOT ABOVE
+ { 0x0228, 0x0229 }, // LATIN CAPITAL LETTER E WITH CEDILLA
+ { 0x022A, 0x022B }, // LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON
+ { 0x022C, 0x022D }, // LATIN CAPITAL LETTER O WITH TILDE AND MACRON
+ { 0x022E, 0x022F }, // LATIN CAPITAL LETTER O WITH DOT ABOVE
+ { 0x0230, 0x0231 }, // LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON
+ { 0x0232, 0x0233 }, // LATIN CAPITAL LETTER Y WITH MACRON
+ { 0x023A, 0x2C65 }, // LATIN CAPITAL LETTER A WITH STROKE
+ { 0x023B, 0x023C }, // LATIN CAPITAL LETTER C WITH STROKE
+ { 0x023D, 0x019A }, // LATIN CAPITAL LETTER L WITH BAR
+ { 0x023E, 0x2C66 }, // LATIN CAPITAL LETTER T WITH DIAGONAL STROKE
+ { 0x0241, 0x0242 }, // LATIN CAPITAL LETTER GLOTTAL STOP
+ { 0x0243, 0x0180 }, // LATIN CAPITAL LETTER B WITH STROKE
+ { 0x0244, 0x0289 }, // LATIN CAPITAL LETTER U BAR
+ { 0x0245, 0x028C }, // LATIN CAPITAL LETTER TURNED V
+ { 0x0246, 0x0247 }, // LATIN CAPITAL LETTER E WITH STROKE
+ { 0x0248, 0x0249 }, // LATIN CAPITAL LETTER J WITH STROKE
+ { 0x024A, 0x024B }, // LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL
+ { 0x024C, 0x024D }, // LATIN CAPITAL LETTER R WITH STROKE
+ { 0x024E, 0x024F }, // LATIN CAPITAL LETTER Y WITH STROKE
+ { 0x0370, 0x0371 }, // GREEK CAPITAL LETTER HETA
+ { 0x0372, 0x0373 }, // GREEK CAPITAL LETTER ARCHAIC SAMPI
+ { 0x0376, 0x0377 }, // GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA
+ { 0x0386, 0x03AC }, // GREEK CAPITAL LETTER ALPHA WITH TONOS
+ { 0x0388, 0x03AD }, // GREEK CAPITAL LETTER EPSILON WITH TONOS
+ { 0x0389, 0x03AE }, // GREEK CAPITAL LETTER ETA WITH TONOS
+ { 0x038A, 0x03AF }, // GREEK CAPITAL LETTER IOTA WITH TONOS
+ { 0x038C, 0x03CC }, // GREEK CAPITAL LETTER OMICRON WITH TONOS
+ { 0x038E, 0x03CD }, // GREEK CAPITAL LETTER UPSILON WITH TONOS
+ { 0x038F, 0x03CE }, // GREEK CAPITAL LETTER OMEGA WITH TONOS
+ { 0x0391, 0x03B1 }, // GREEK CAPITAL LETTER ALPHA
+ { 0x0392, 0x03B2 }, // GREEK CAPITAL LETTER BETA
+ { 0x0393, 0x03B3 }, // GREEK CAPITAL LETTER GAMMA
+ { 0x0394, 0x03B4 }, // GREEK CAPITAL LETTER DELTA
+ { 0x0395, 0x03B5 }, // GREEK CAPITAL LETTER EPSILON
+ { 0x0396, 0x03B6 }, // GREEK CAPITAL LETTER ZETA
+ { 0x0397, 0x03B7 }, // GREEK CAPITAL LETTER ETA
+ { 0x0398, 0x03B8 }, // GREEK CAPITAL LETTER THETA
+ { 0x0399, 0x03B9 }, // GREEK CAPITAL LETTER IOTA
+ { 0x039A, 0x03BA }, // GREEK CAPITAL LETTER KAPPA
+ { 0x039B, 0x03BB }, // GREEK CAPITAL LETTER LAMDA
+ { 0x039C, 0x03BC }, // GREEK CAPITAL LETTER MU
+ { 0x039D, 0x03BD }, // GREEK CAPITAL LETTER NU
+ { 0x039E, 0x03BE }, // GREEK CAPITAL LETTER XI
+ { 0x039F, 0x03BF }, // GREEK CAPITAL LETTER OMICRON
+ { 0x03A0, 0x03C0 }, // GREEK CAPITAL LETTER PI
+ { 0x03A1, 0x03C1 }, // GREEK CAPITAL LETTER RHO
+ { 0x03A3, 0x03C3 }, // GREEK CAPITAL LETTER SIGMA
+ { 0x03A4, 0x03C4 }, // GREEK CAPITAL LETTER TAU
+ { 0x03A5, 0x03C5 }, // GREEK CAPITAL LETTER UPSILON
+ { 0x03A6, 0x03C6 }, // GREEK CAPITAL LETTER PHI
+ { 0x03A7, 0x03C7 }, // GREEK CAPITAL LETTER CHI
+ { 0x03A8, 0x03C8 }, // GREEK CAPITAL LETTER PSI
+ { 0x03A9, 0x03C9 }, // GREEK CAPITAL LETTER OMEGA
+ { 0x03AA, 0x03CA }, // GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+ { 0x03AB, 0x03CB }, // GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+ { 0x03CF, 0x03D7 }, // GREEK CAPITAL KAI SYMBOL
+ { 0x03D8, 0x03D9 }, // GREEK LETTER ARCHAIC KOPPA
+ { 0x03DA, 0x03DB }, // GREEK LETTER STIGMA
+ { 0x03DC, 0x03DD }, // GREEK LETTER DIGAMMA
+ { 0x03DE, 0x03DF }, // GREEK LETTER KOPPA
+ { 0x03E0, 0x03E1 }, // GREEK LETTER SAMPI
+ { 0x03E2, 0x03E3 }, // COPTIC CAPITAL LETTER SHEI
+ { 0x03E4, 0x03E5 }, // COPTIC CAPITAL LETTER FEI
+ { 0x03E6, 0x03E7 }, // COPTIC CAPITAL LETTER KHEI
+ { 0x03E8, 0x03E9 }, // COPTIC CAPITAL LETTER HORI
+ { 0x03EA, 0x03EB }, // COPTIC CAPITAL LETTER GANGIA
+ { 0x03EC, 0x03ED }, // COPTIC CAPITAL LETTER SHIMA
+ { 0x03EE, 0x03EF }, // COPTIC CAPITAL LETTER DEI
+ { 0x03F4, 0x03B8 }, // GREEK CAPITAL THETA SYMBOL
+ { 0x03F7, 0x03F8 }, // GREEK CAPITAL LETTER SHO
+ { 0x03F9, 0x03F2 }, // GREEK CAPITAL LUNATE SIGMA SYMBOL
+ { 0x03FA, 0x03FB }, // GREEK CAPITAL LETTER SAN
+ { 0x03FD, 0x037B }, // GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL
+ { 0x03FE, 0x037C }, // GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL
+ { 0x03FF, 0x037D }, // GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL
+ { 0x0400, 0x0450 }, // CYRILLIC CAPITAL LETTER IE WITH GRAVE
+ { 0x0401, 0x0451 }, // CYRILLIC CAPITAL LETTER IO
+ { 0x0402, 0x0452 }, // CYRILLIC CAPITAL LETTER DJE
+ { 0x0403, 0x0453 }, // CYRILLIC CAPITAL LETTER GJE
+ { 0x0404, 0x0454 }, // CYRILLIC CAPITAL LETTER UKRAINIAN IE
+ { 0x0405, 0x0455 }, // CYRILLIC CAPITAL LETTER DZE
+ { 0x0406, 0x0456 }, // CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+ { 0x0407, 0x0457 }, // CYRILLIC CAPITAL LETTER YI
+ { 0x0408, 0x0458 }, // CYRILLIC CAPITAL LETTER JE
+ { 0x0409, 0x0459 }, // CYRILLIC CAPITAL LETTER LJE
+ { 0x040A, 0x045A }, // CYRILLIC CAPITAL LETTER NJE
+ { 0x040B, 0x045B }, // CYRILLIC CAPITAL LETTER TSHE
+ { 0x040C, 0x045C }, // CYRILLIC CAPITAL LETTER KJE
+ { 0x040D, 0x045D }, // CYRILLIC CAPITAL LETTER I WITH GRAVE
+ { 0x040E, 0x045E }, // CYRILLIC CAPITAL LETTER SHORT U
+ { 0x040F, 0x045F }, // CYRILLIC CAPITAL LETTER DZHE
+ { 0x0410, 0x0430 }, // CYRILLIC CAPITAL LETTER A
+ { 0x0411, 0x0431 }, // CYRILLIC CAPITAL LETTER BE
+ { 0x0412, 0x0432 }, // CYRILLIC CAPITAL LETTER VE
+ { 0x0413, 0x0433 }, // CYRILLIC CAPITAL LETTER GHE
+ { 0x0414, 0x0434 }, // CYRILLIC CAPITAL LETTER DE
+ { 0x0415, 0x0435 }, // CYRILLIC CAPITAL LETTER IE
+ { 0x0416, 0x0436 }, // CYRILLIC CAPITAL LETTER ZHE
+ { 0x0417, 0x0437 }, // CYRILLIC CAPITAL LETTER ZE
+ { 0x0418, 0x0438 }, // CYRILLIC CAPITAL LETTER I
+ { 0x0419, 0x0439 }, // CYRILLIC CAPITAL LETTER SHORT I
+ { 0x041A, 0x043A }, // CYRILLIC CAPITAL LETTER KA
+ { 0x041B, 0x043B }, // CYRILLIC CAPITAL LETTER EL
+ { 0x041C, 0x043C }, // CYRILLIC CAPITAL LETTER EM
+ { 0x041D, 0x043D }, // CYRILLIC CAPITAL LETTER EN
+ { 0x041E, 0x043E }, // CYRILLIC CAPITAL LETTER O
+ { 0x041F, 0x043F }, // CYRILLIC CAPITAL LETTER PE
+ { 0x0420, 0x0440 }, // CYRILLIC CAPITAL LETTER ER
+ { 0x0421, 0x0441 }, // CYRILLIC CAPITAL LETTER ES
+ { 0x0422, 0x0442 }, // CYRILLIC CAPITAL LETTER TE
+ { 0x0423, 0x0443 }, // CYRILLIC CAPITAL LETTER U
+ { 0x0424, 0x0444 }, // CYRILLIC CAPITAL LETTER EF
+ { 0x0425, 0x0445 }, // CYRILLIC CAPITAL LETTER HA
+ { 0x0426, 0x0446 }, // CYRILLIC CAPITAL LETTER TSE
+ { 0x0427, 0x0447 }, // CYRILLIC CAPITAL LETTER CHE
+ { 0x0428, 0x0448 }, // CYRILLIC CAPITAL LETTER SHA
+ { 0x0429, 0x0449 }, // CYRILLIC CAPITAL LETTER SHCHA
+ { 0x042A, 0x044A }, // CYRILLIC CAPITAL LETTER HARD SIGN
+ { 0x042B, 0x044B }, // CYRILLIC CAPITAL LETTER YERU
+ { 0x042C, 0x044C }, // CYRILLIC CAPITAL LETTER SOFT SIGN
+ { 0x042D, 0x044D }, // CYRILLIC CAPITAL LETTER E
+ { 0x042E, 0x044E }, // CYRILLIC CAPITAL LETTER YU
+ { 0x042F, 0x044F }, // CYRILLIC CAPITAL LETTER YA
+ { 0x0460, 0x0461 }, // CYRILLIC CAPITAL LETTER OMEGA
+ { 0x0462, 0x0463 }, // CYRILLIC CAPITAL LETTER YAT
+ { 0x0464, 0x0465 }, // CYRILLIC CAPITAL LETTER IOTIFIED E
+ { 0x0466, 0x0467 }, // CYRILLIC CAPITAL LETTER LITTLE YUS
+ { 0x0468, 0x0469 }, // CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS
+ { 0x046A, 0x046B }, // CYRILLIC CAPITAL LETTER BIG YUS
+ { 0x046C, 0x046D }, // CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS
+ { 0x046E, 0x046F }, // CYRILLIC CAPITAL LETTER KSI
+ { 0x0470, 0x0471 }, // CYRILLIC CAPITAL LETTER PSI
+ { 0x0472, 0x0473 }, // CYRILLIC CAPITAL LETTER FITA
+ { 0x0474, 0x0475 }, // CYRILLIC CAPITAL LETTER IZHITSA
+ { 0x0476, 0x0477 }, // CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT
+ { 0x0478, 0x0479 }, // CYRILLIC CAPITAL LETTER UK
+ { 0x047A, 0x047B }, // CYRILLIC CAPITAL LETTER ROUND OMEGA
+ { 0x047C, 0x047D }, // CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
+ { 0x047E, 0x047F }, // CYRILLIC CAPITAL LETTER OT
+ { 0x0480, 0x0481 }, // CYRILLIC CAPITAL LETTER KOPPA
+ { 0x048A, 0x048B }, // CYRILLIC CAPITAL LETTER SHORT I WITH TAIL
+ { 0x048C, 0x048D }, // CYRILLIC CAPITAL LETTER SEMISOFT SIGN
+ { 0x048E, 0x048F }, // CYRILLIC CAPITAL LETTER ER WITH TICK
+ { 0x0490, 0x0491 }, // CYRILLIC CAPITAL LETTER GHE WITH UPTURN
+ { 0x0492, 0x0493 }, // CYRILLIC CAPITAL LETTER GHE WITH STROKE
+ { 0x0494, 0x0495 }, // CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK
+ { 0x0496, 0x0497 }, // CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
+ { 0x0498, 0x0499 }, // CYRILLIC CAPITAL LETTER ZE WITH DESCENDER
+ { 0x049A, 0x049B }, // CYRILLIC CAPITAL LETTER KA WITH DESCENDER
+ { 0x049C, 0x049D }, // CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
+ { 0x049E, 0x049F }, // CYRILLIC CAPITAL LETTER KA WITH STROKE
+ { 0x04A0, 0x04A1 }, // CYRILLIC CAPITAL LETTER BASHKIR KA
+ { 0x04A2, 0x04A3 }, // CYRILLIC CAPITAL LETTER EN WITH DESCENDER
+ { 0x04A4, 0x04A5 }, // CYRILLIC CAPITAL LIGATURE EN GHE
+ { 0x04A6, 0x04A7 }, // CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
+ { 0x04A8, 0x04A9 }, // CYRILLIC CAPITAL LETTER ABKHASIAN HA
+ { 0x04AA, 0x04AB }, // CYRILLIC CAPITAL LETTER ES WITH DESCENDER
+ { 0x04AC, 0x04AD }, // CYRILLIC CAPITAL LETTER TE WITH DESCENDER
+ { 0x04AE, 0x04AF }, // CYRILLIC CAPITAL LETTER STRAIGHT U
+ { 0x04B0, 0x04B1 }, // CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
+ { 0x04B2, 0x04B3 }, // CYRILLIC CAPITAL LETTER HA WITH DESCENDER
+ { 0x04B4, 0x04B5 }, // CYRILLIC CAPITAL LIGATURE TE TSE
+ { 0x04B6, 0x04B7 }, // CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
+ { 0x04B8, 0x04B9 }, // CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
+ { 0x04BA, 0x04BB }, // CYRILLIC CAPITAL LETTER SHHA
+ { 0x04BC, 0x04BD }, // CYRILLIC CAPITAL LETTER ABKHASIAN CHE
+ { 0x04BE, 0x04BF }, // CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
+ { 0x04C0, 0x04CF }, // CYRILLIC LETTER PALOCHKA
+ { 0x04C1, 0x04C2 }, // CYRILLIC CAPITAL LETTER ZHE WITH BREVE
+ { 0x04C3, 0x04C4 }, // CYRILLIC CAPITAL LETTER KA WITH HOOK
+ { 0x04C5, 0x04C6 }, // CYRILLIC CAPITAL LETTER EL WITH TAIL
+ { 0x04C7, 0x04C8 }, // CYRILLIC CAPITAL LETTER EN WITH HOOK
+ { 0x04C9, 0x04CA }, // CYRILLIC CAPITAL LETTER EN WITH TAIL
+ { 0x04CB, 0x04CC }, // CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
+ { 0x04CD, 0x04CE }, // CYRILLIC CAPITAL LETTER EM WITH TAIL
+ { 0x04D0, 0x04D1 }, // CYRILLIC CAPITAL LETTER A WITH BREVE
+ { 0x04D2, 0x04D3 }, // CYRILLIC CAPITAL LETTER A WITH DIAERESIS
+ { 0x04D4, 0x04D5 }, // CYRILLIC CAPITAL LIGATURE A IE
+ { 0x04D6, 0x04D7 }, // CYRILLIC CAPITAL LETTER IE WITH BREVE
+ { 0x04D8, 0x04D9 }, // CYRILLIC CAPITAL LETTER SCHWA
+ { 0x04DA, 0x04DB }, // CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS
+ { 0x04DC, 0x04DD }, // CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS
+ { 0x04DE, 0x04DF }, // CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS
+ { 0x04E0, 0x04E1 }, // CYRILLIC CAPITAL LETTER ABKHASIAN DZE
+ { 0x04E2, 0x04E3 }, // CYRILLIC CAPITAL LETTER I WITH MACRON
+ { 0x04E4, 0x04E5 }, // CYRILLIC CAPITAL LETTER I WITH DIAERESIS
+ { 0x04E6, 0x04E7 }, // CYRILLIC CAPITAL LETTER O WITH DIAERESIS
+ { 0x04E8, 0x04E9 }, // CYRILLIC CAPITAL LETTER BARRED O
+ { 0x04EA, 0x04EB }, // CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS
+ { 0x04EC, 0x04ED }, // CYRILLIC CAPITAL LETTER E WITH DIAERESIS
+ { 0x04EE, 0x04EF }, // CYRILLIC CAPITAL LETTER U WITH MACRON
+ { 0x04F0, 0x04F1 }, // CYRILLIC CAPITAL LETTER U WITH DIAERESIS
+ { 0x04F2, 0x04F3 }, // CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE
+ { 0x04F4, 0x04F5 }, // CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS
+ { 0x04F6, 0x04F7 }, // CYRILLIC CAPITAL LETTER GHE WITH DESCENDER
+ { 0x04F8, 0x04F9 }, // CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS
+ { 0x04FA, 0x04FB }, // CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK
+ { 0x04FC, 0x04FD }, // CYRILLIC CAPITAL LETTER HA WITH HOOK
+ { 0x04FE, 0x04FF }, // CYRILLIC CAPITAL LETTER HA WITH STROKE
+ { 0x0500, 0x0501 }, // CYRILLIC CAPITAL LETTER KOMI DE
+ { 0x0502, 0x0503 }, // CYRILLIC CAPITAL LETTER KOMI DJE
+ { 0x0504, 0x0505 }, // CYRILLIC CAPITAL LETTER KOMI ZJE
+ { 0x0506, 0x0507 }, // CYRILLIC CAPITAL LETTER KOMI DZJE
+ { 0x0508, 0x0509 }, // CYRILLIC CAPITAL LETTER KOMI LJE
+ { 0x050A, 0x050B }, // CYRILLIC CAPITAL LETTER KOMI NJE
+ { 0x050C, 0x050D }, // CYRILLIC CAPITAL LETTER KOMI SJE
+ { 0x050E, 0x050F }, // CYRILLIC CAPITAL LETTER KOMI TJE
+ { 0x0510, 0x0511 }, // CYRILLIC CAPITAL LETTER REVERSED ZE
+ { 0x0512, 0x0513 }, // CYRILLIC CAPITAL LETTER EL WITH HOOK
+ { 0x0514, 0x0515 }, // CYRILLIC CAPITAL LETTER LHA
+ { 0x0516, 0x0517 }, // CYRILLIC CAPITAL LETTER RHA
+ { 0x0518, 0x0519 }, // CYRILLIC CAPITAL LETTER YAE
+ { 0x051A, 0x051B }, // CYRILLIC CAPITAL LETTER QA
+ { 0x051C, 0x051D }, // CYRILLIC CAPITAL LETTER WE
+ { 0x051E, 0x051F }, // CYRILLIC CAPITAL LETTER ALEUT KA
+ { 0x0520, 0x0521 }, // CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK
+ { 0x0522, 0x0523 }, // CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK
+ { 0x0524, 0x0525 }, // CYRILLIC CAPITAL LETTER PE WITH DESCENDER
+ { 0x0526, 0x0527 }, // CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER
+ { 0x0531, 0x0561 }, // ARMENIAN CAPITAL LETTER AYB
+ { 0x0532, 0x0562 }, // ARMENIAN CAPITAL LETTER BEN
+ { 0x0533, 0x0563 }, // ARMENIAN CAPITAL LETTER GIM
+ { 0x0534, 0x0564 }, // ARMENIAN CAPITAL LETTER DA
+ { 0x0535, 0x0565 }, // ARMENIAN CAPITAL LETTER ECH
+ { 0x0536, 0x0566 }, // ARMENIAN CAPITAL LETTER ZA
+ { 0x0537, 0x0567 }, // ARMENIAN CAPITAL LETTER EH
+ { 0x0538, 0x0568 }, // ARMENIAN CAPITAL LETTER ET
+ { 0x0539, 0x0569 }, // ARMENIAN CAPITAL LETTER TO
+ { 0x053A, 0x056A }, // ARMENIAN CAPITAL LETTER ZHE
+ { 0x053B, 0x056B }, // ARMENIAN CAPITAL LETTER INI
+ { 0x053C, 0x056C }, // ARMENIAN CAPITAL LETTER LIWN
+ { 0x053D, 0x056D }, // ARMENIAN CAPITAL LETTER XEH
+ { 0x053E, 0x056E }, // ARMENIAN CAPITAL LETTER CA
+ { 0x053F, 0x056F }, // ARMENIAN CAPITAL LETTER KEN
+ { 0x0540, 0x0570 }, // ARMENIAN CAPITAL LETTER HO
+ { 0x0541, 0x0571 }, // ARMENIAN CAPITAL LETTER JA
+ { 0x0542, 0x0572 }, // ARMENIAN CAPITAL LETTER GHAD
+ { 0x0543, 0x0573 }, // ARMENIAN CAPITAL LETTER CHEH
+ { 0x0544, 0x0574 }, // ARMENIAN CAPITAL LETTER MEN
+ { 0x0545, 0x0575 }, // ARMENIAN CAPITAL LETTER YI
+ { 0x0546, 0x0576 }, // ARMENIAN CAPITAL LETTER NOW
+ { 0x0547, 0x0577 }, // ARMENIAN CAPITAL LETTER SHA
+ { 0x0548, 0x0578 }, // ARMENIAN CAPITAL LETTER VO
+ { 0x0549, 0x0579 }, // ARMENIAN CAPITAL LETTER CHA
+ { 0x054A, 0x057A }, // ARMENIAN CAPITAL LETTER PEH
+ { 0x054B, 0x057B }, // ARMENIAN CAPITAL LETTER JHEH
+ { 0x054C, 0x057C }, // ARMENIAN CAPITAL LETTER RA
+ { 0x054D, 0x057D }, // ARMENIAN CAPITAL LETTER SEH
+ { 0x054E, 0x057E }, // ARMENIAN CAPITAL LETTER VEW
+ { 0x054F, 0x057F }, // ARMENIAN CAPITAL LETTER TIWN
+ { 0x0550, 0x0580 }, // ARMENIAN CAPITAL LETTER REH
+ { 0x0551, 0x0581 }, // ARMENIAN CAPITAL LETTER CO
+ { 0x0552, 0x0582 }, // ARMENIAN CAPITAL LETTER YIWN
+ { 0x0553, 0x0583 }, // ARMENIAN CAPITAL LETTER PIWR
+ { 0x0554, 0x0584 }, // ARMENIAN CAPITAL LETTER KEH
+ { 0x0555, 0x0585 }, // ARMENIAN CAPITAL LETTER OH
+ { 0x0556, 0x0586 }, // ARMENIAN CAPITAL LETTER FEH
+ { 0x10A0, 0x2D00 }, // GEORGIAN CAPITAL LETTER AN
+ { 0x10A1, 0x2D01 }, // GEORGIAN CAPITAL LETTER BAN
+ { 0x10A2, 0x2D02 }, // GEORGIAN CAPITAL LETTER GAN
+ { 0x10A3, 0x2D03 }, // GEORGIAN CAPITAL LETTER DON
+ { 0x10A4, 0x2D04 }, // GEORGIAN CAPITAL LETTER EN
+ { 0x10A5, 0x2D05 }, // GEORGIAN CAPITAL LETTER VIN
+ { 0x10A6, 0x2D06 }, // GEORGIAN CAPITAL LETTER ZEN
+ { 0x10A7, 0x2D07 }, // GEORGIAN CAPITAL LETTER TAN
+ { 0x10A8, 0x2D08 }, // GEORGIAN CAPITAL LETTER IN
+ { 0x10A9, 0x2D09 }, // GEORGIAN CAPITAL LETTER KAN
+ { 0x10AA, 0x2D0A }, // GEORGIAN CAPITAL LETTER LAS
+ { 0x10AB, 0x2D0B }, // GEORGIAN CAPITAL LETTER MAN
+ { 0x10AC, 0x2D0C }, // GEORGIAN CAPITAL LETTER NAR
+ { 0x10AD, 0x2D0D }, // GEORGIAN CAPITAL LETTER ON
+ { 0x10AE, 0x2D0E }, // GEORGIAN CAPITAL LETTER PAR
+ { 0x10AF, 0x2D0F }, // GEORGIAN CAPITAL LETTER ZHAR
+ { 0x10B0, 0x2D10 }, // GEORGIAN CAPITAL LETTER RAE
+ { 0x10B1, 0x2D11 }, // GEORGIAN CAPITAL LETTER SAN
+ { 0x10B2, 0x2D12 }, // GEORGIAN CAPITAL LETTER TAR
+ { 0x10B3, 0x2D13 }, // GEORGIAN CAPITAL LETTER UN
+ { 0x10B4, 0x2D14 }, // GEORGIAN CAPITAL LETTER PHAR
+ { 0x10B5, 0x2D15 }, // GEORGIAN CAPITAL LETTER KHAR
+ { 0x10B6, 0x2D16 }, // GEORGIAN CAPITAL LETTER GHAN
+ { 0x10B7, 0x2D17 }, // GEORGIAN CAPITAL LETTER QAR
+ { 0x10B8, 0x2D18 }, // GEORGIAN CAPITAL LETTER SHIN
+ { 0x10B9, 0x2D19 }, // GEORGIAN CAPITAL LETTER CHIN
+ { 0x10BA, 0x2D1A }, // GEORGIAN CAPITAL LETTER CAN
+ { 0x10BB, 0x2D1B }, // GEORGIAN CAPITAL LETTER JIL
+ { 0x10BC, 0x2D1C }, // GEORGIAN CAPITAL LETTER CIL
+ { 0x10BD, 0x2D1D }, // GEORGIAN CAPITAL LETTER CHAR
+ { 0x10BE, 0x2D1E }, // GEORGIAN CAPITAL LETTER XAN
+ { 0x10BF, 0x2D1F }, // GEORGIAN CAPITAL LETTER JHAN
+ { 0x10C0, 0x2D20 }, // GEORGIAN CAPITAL LETTER HAE
+ { 0x10C1, 0x2D21 }, // GEORGIAN CAPITAL LETTER HE
+ { 0x10C2, 0x2D22 }, // GEORGIAN CAPITAL LETTER HIE
+ { 0x10C3, 0x2D23 }, // GEORGIAN CAPITAL LETTER WE
+ { 0x10C4, 0x2D24 }, // GEORGIAN CAPITAL LETTER HAR
+ { 0x10C5, 0x2D25 }, // GEORGIAN CAPITAL LETTER HOE
+ { 0x1E00, 0x1E01 }, // LATIN CAPITAL LETTER A WITH RING BELOW
+ { 0x1E02, 0x1E03 }, // LATIN CAPITAL LETTER B WITH DOT ABOVE
+ { 0x1E04, 0x1E05 }, // LATIN CAPITAL LETTER B WITH DOT BELOW
+ { 0x1E06, 0x1E07 }, // LATIN CAPITAL LETTER B WITH LINE BELOW
+ { 0x1E08, 0x1E09 }, // LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE
+ { 0x1E0A, 0x1E0B }, // LATIN CAPITAL LETTER D WITH DOT ABOVE
+ { 0x1E0C, 0x1E0D }, // LATIN CAPITAL LETTER D WITH DOT BELOW
+ { 0x1E0E, 0x1E0F }, // LATIN CAPITAL LETTER D WITH LINE BELOW
+ { 0x1E10, 0x1E11 }, // LATIN CAPITAL LETTER D WITH CEDILLA
+ { 0x1E12, 0x1E13 }, // LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW
+ { 0x1E14, 0x1E15 }, // LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
+ { 0x1E16, 0x1E17 }, // LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
+ { 0x1E18, 0x1E19 }, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW
+ { 0x1E1A, 0x1E1B }, // LATIN CAPITAL LETTER E WITH TILDE BELOW
+ { 0x1E1C, 0x1E1D }, // LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE
+ { 0x1E1E, 0x1E1F }, // LATIN CAPITAL LETTER F WITH DOT ABOVE
+ { 0x1E20, 0x1E21 }, // LATIN CAPITAL LETTER G WITH MACRON
+ { 0x1E22, 0x1E23 }, // LATIN CAPITAL LETTER H WITH DOT ABOVE
+ { 0x1E24, 0x1E25 }, // LATIN CAPITAL LETTER H WITH DOT BELOW
+ { 0x1E26, 0x1E27 }, // LATIN CAPITAL LETTER H WITH DIAERESIS
+ { 0x1E28, 0x1E29 }, // LATIN CAPITAL LETTER H WITH CEDILLA
+ { 0x1E2A, 0x1E2B }, // LATIN CAPITAL LETTER H WITH BREVE BELOW
+ { 0x1E2C, 0x1E2D }, // LATIN CAPITAL LETTER I WITH TILDE BELOW
+ { 0x1E2E, 0x1E2F }, // LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
+ { 0x1E30, 0x1E31 }, // LATIN CAPITAL LETTER K WITH ACUTE
+ { 0x1E32, 0x1E33 }, // LATIN CAPITAL LETTER K WITH DOT BELOW
+ { 0x1E34, 0x1E35 }, // LATIN CAPITAL LETTER K WITH LINE BELOW
+ { 0x1E36, 0x1E37 }, // LATIN CAPITAL LETTER L WITH DOT BELOW
+ { 0x1E38, 0x1E39 }, // LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON
+ { 0x1E3A, 0x1E3B }, // LATIN CAPITAL LETTER L WITH LINE BELOW
+ { 0x1E3C, 0x1E3D }, // LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW
+ { 0x1E3E, 0x1E3F }, // LATIN CAPITAL LETTER M WITH ACUTE
+ { 0x1E40, 0x1E41 }, // LATIN CAPITAL LETTER M WITH DOT ABOVE
+ { 0x1E42, 0x1E43 }, // LATIN CAPITAL LETTER M WITH DOT BELOW
+ { 0x1E44, 0x1E45 }, // LATIN CAPITAL LETTER N WITH DOT ABOVE
+ { 0x1E46, 0x1E47 }, // LATIN CAPITAL LETTER N WITH DOT BELOW
+ { 0x1E48, 0x1E49 }, // LATIN CAPITAL LETTER N WITH LINE BELOW
+ { 0x1E4A, 0x1E4B }, // LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW
+ { 0x1E4C, 0x1E4D }, // LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
+ { 0x1E4E, 0x1E4F }, // LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS
+ { 0x1E50, 0x1E51 }, // LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
+ { 0x1E52, 0x1E53 }, // LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
+ { 0x1E54, 0x1E55 }, // LATIN CAPITAL LETTER P WITH ACUTE
+ { 0x1E56, 0x1E57 }, // LATIN CAPITAL LETTER P WITH DOT ABOVE
+ { 0x1E58, 0x1E59 }, // LATIN CAPITAL LETTER R WITH DOT ABOVE
+ { 0x1E5A, 0x1E5B }, // LATIN CAPITAL LETTER R WITH DOT BELOW
+ { 0x1E5C, 0x1E5D }, // LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON
+ { 0x1E5E, 0x1E5F }, // LATIN CAPITAL LETTER R WITH LINE BELOW
+ { 0x1E60, 0x1E61 }, // LATIN CAPITAL LETTER S WITH DOT ABOVE
+ { 0x1E62, 0x1E63 }, // LATIN CAPITAL LETTER S WITH DOT BELOW
+ { 0x1E64, 0x1E65 }, // LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE
+ { 0x1E66, 0x1E67 }, // LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE
+ { 0x1E68, 0x1E69 }, // LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE
+ { 0x1E6A, 0x1E6B }, // LATIN CAPITAL LETTER T WITH DOT ABOVE
+ { 0x1E6C, 0x1E6D }, // LATIN CAPITAL LETTER T WITH DOT BELOW
+ { 0x1E6E, 0x1E6F }, // LATIN CAPITAL LETTER T WITH LINE BELOW
+ { 0x1E70, 0x1E71 }, // LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW
+ { 0x1E72, 0x1E73 }, // LATIN CAPITAL LETTER U WITH DIAERESIS BELOW
+ { 0x1E74, 0x1E75 }, // LATIN CAPITAL LETTER U WITH TILDE BELOW
+ { 0x1E76, 0x1E77 }, // LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW
+ { 0x1E78, 0x1E79 }, // LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
+ { 0x1E7A, 0x1E7B }, // LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS
+ { 0x1E7C, 0x1E7D }, // LATIN CAPITAL LETTER V WITH TILDE
+ { 0x1E7E, 0x1E7F }, // LATIN CAPITAL LETTER V WITH DOT BELOW
+ { 0x1E80, 0x1E81 }, // LATIN CAPITAL LETTER W WITH GRAVE
+ { 0x1E82, 0x1E83 }, // LATIN CAPITAL LETTER W WITH ACUTE
+ { 0x1E84, 0x1E85 }, // LATIN CAPITAL LETTER W WITH DIAERESIS
+ { 0x1E86, 0x1E87 }, // LATIN CAPITAL LETTER W WITH DOT ABOVE
+ { 0x1E88, 0x1E89 }, // LATIN CAPITAL LETTER W WITH DOT BELOW
+ { 0x1E8A, 0x1E8B }, // LATIN CAPITAL LETTER X WITH DOT ABOVE
+ { 0x1E8C, 0x1E8D }, // LATIN CAPITAL LETTER X WITH DIAERESIS
+ { 0x1E8E, 0x1E8F }, // LATIN CAPITAL LETTER Y WITH DOT ABOVE
+ { 0x1E90, 0x1E91 }, // LATIN CAPITAL LETTER Z WITH CIRCUMFLEX
+ { 0x1E92, 0x1E93 }, // LATIN CAPITAL LETTER Z WITH DOT BELOW
+ { 0x1E94, 0x1E95 }, // LATIN CAPITAL LETTER Z WITH LINE BELOW
+ { 0x1E9E, 0x00DF }, // LATIN CAPITAL LETTER SHARP S
+ { 0x1EA0, 0x1EA1 }, // LATIN CAPITAL LETTER A WITH DOT BELOW
+ { 0x1EA2, 0x1EA3 }, // LATIN CAPITAL LETTER A WITH HOOK ABOVE
+ { 0x1EA4, 0x1EA5 }, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
+ { 0x1EA6, 0x1EA7 }, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
+ { 0x1EA8, 0x1EA9 }, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
+ { 0x1EAA, 0x1EAB }, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
+ { 0x1EAC, 0x1EAD }, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
+ { 0x1EAE, 0x1EAF }, // LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
+ { 0x1EB0, 0x1EB1 }, // LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
+ { 0x1EB2, 0x1EB3 }, // LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
+ { 0x1EB4, 0x1EB5 }, // LATIN CAPITAL LETTER A WITH BREVE AND TILDE
+ { 0x1EB6, 0x1EB7 }, // LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
+ { 0x1EB8, 0x1EB9 }, // LATIN CAPITAL LETTER E WITH DOT BELOW
+ { 0x1EBA, 0x1EBB }, // LATIN CAPITAL LETTER E WITH HOOK ABOVE
+ { 0x1EBC, 0x1EBD }, // LATIN CAPITAL LETTER E WITH TILDE
+ { 0x1EBE, 0x1EBF }, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
+ { 0x1EC0, 0x1EC1 }, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
+ { 0x1EC2, 0x1EC3 }, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
+ { 0x1EC4, 0x1EC5 }, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
+ { 0x1EC6, 0x1EC7 }, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
+ { 0x1EC8, 0x1EC9 }, // LATIN CAPITAL LETTER I WITH HOOK ABOVE
+ { 0x1ECA, 0x1ECB }, // LATIN CAPITAL LETTER I WITH DOT BELOW
+ { 0x1ECC, 0x1ECD }, // LATIN CAPITAL LETTER O WITH DOT BELOW
+ { 0x1ECE, 0x1ECF }, // LATIN CAPITAL LETTER O WITH HOOK ABOVE
+ { 0x1ED0, 0x1ED1 }, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
+ { 0x1ED2, 0x1ED3 }, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
+ { 0x1ED4, 0x1ED5 }, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
+ { 0x1ED6, 0x1ED7 }, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
+ { 0x1ED8, 0x1ED9 }, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
+ { 0x1EDA, 0x1EDB }, // LATIN CAPITAL LETTER O WITH HORN AND ACUTE
+ { 0x1EDC, 0x1EDD }, // LATIN CAPITAL LETTER O WITH HORN AND GRAVE
+ { 0x1EDE, 0x1EDF }, // LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE
+ { 0x1EE0, 0x1EE1 }, // LATIN CAPITAL LETTER O WITH HORN AND TILDE
+ { 0x1EE2, 0x1EE3 }, // LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW
+ { 0x1EE4, 0x1EE5 }, // LATIN CAPITAL LETTER U WITH DOT BELOW
+ { 0x1EE6, 0x1EE7 }, // LATIN CAPITAL LETTER U WITH HOOK ABOVE
+ { 0x1EE8, 0x1EE9 }, // LATIN CAPITAL LETTER U WITH HORN AND ACUTE
+ { 0x1EEA, 0x1EEB }, // LATIN CAPITAL LETTER U WITH HORN AND GRAVE
+ { 0x1EEC, 0x1EED }, // LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE
+ { 0x1EEE, 0x1EEF }, // LATIN CAPITAL LETTER U WITH HORN AND TILDE
+ { 0x1EF0, 0x1EF1 }, // LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW
+ { 0x1EF2, 0x1EF3 }, // LATIN CAPITAL LETTER Y WITH GRAVE
+ { 0x1EF4, 0x1EF5 }, // LATIN CAPITAL LETTER Y WITH DOT BELOW
+ { 0x1EF6, 0x1EF7 }, // LATIN CAPITAL LETTER Y WITH HOOK ABOVE
+ { 0x1EF8, 0x1EF9 }, // LATIN CAPITAL LETTER Y WITH TILDE
+ { 0x1EFA, 0x1EFB }, // LATIN CAPITAL LETTER MIDDLE-WELSH LL
+ { 0x1EFC, 0x1EFD }, // LATIN CAPITAL LETTER MIDDLE-WELSH V
+ { 0x1EFE, 0x1EFF }, // LATIN CAPITAL LETTER Y WITH LOOP
+ { 0x1F08, 0x1F00 }, // GREEK CAPITAL LETTER ALPHA WITH PSILI
+ { 0x1F09, 0x1F01 }, // GREEK CAPITAL LETTER ALPHA WITH DASIA
+ { 0x1F0A, 0x1F02 }, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA
+ { 0x1F0B, 0x1F03 }, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA
+ { 0x1F0C, 0x1F04 }, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA
+ { 0x1F0D, 0x1F05 }, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA
+ { 0x1F0E, 0x1F06 }, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI
+ { 0x1F0F, 0x1F07 }, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI
+ { 0x1F18, 0x1F10 }, // GREEK CAPITAL LETTER EPSILON WITH PSILI
+ { 0x1F19, 0x1F11 }, // GREEK CAPITAL LETTER EPSILON WITH DASIA
+ { 0x1F1A, 0x1F12 }, // GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA
+ { 0x1F1B, 0x1F13 }, // GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA
+ { 0x1F1C, 0x1F14 }, // GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA
+ { 0x1F1D, 0x1F15 }, // GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
+ { 0x1F28, 0x1F20 }, // GREEK CAPITAL LETTER ETA WITH PSILI
+ { 0x1F29, 0x1F21 }, // GREEK CAPITAL LETTER ETA WITH DASIA
+ { 0x1F2A, 0x1F22 }, // GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA
+ { 0x1F2B, 0x1F23 }, // GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA
+ { 0x1F2C, 0x1F24 }, // GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA
+ { 0x1F2D, 0x1F25 }, // GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA
+ { 0x1F2E, 0x1F26 }, // GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI
+ { 0x1F2F, 0x1F27 }, // GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI
+ { 0x1F38, 0x1F30 }, // GREEK CAPITAL LETTER IOTA WITH PSILI
+ { 0x1F39, 0x1F31 }, // GREEK CAPITAL LETTER IOTA WITH DASIA
+ { 0x1F3A, 0x1F32 }, // GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA
+ { 0x1F3B, 0x1F33 }, // GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA
+ { 0x1F3C, 0x1F34 }, // GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA
+ { 0x1F3D, 0x1F35 }, // GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA
+ { 0x1F3E, 0x1F36 }, // GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI
+ { 0x1F3F, 0x1F37 }, // GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI
+ { 0x1F48, 0x1F40 }, // GREEK CAPITAL LETTER OMICRON WITH PSILI
+ { 0x1F49, 0x1F41 }, // GREEK CAPITAL LETTER OMICRON WITH DASIA
+ { 0x1F4A, 0x1F42 }, // GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA
+ { 0x1F4B, 0x1F43 }, // GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA
+ { 0x1F4C, 0x1F44 }, // GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA
+ { 0x1F4D, 0x1F45 }, // GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
+ { 0x1F59, 0x1F51 }, // GREEK CAPITAL LETTER UPSILON WITH DASIA
+ { 0x1F5B, 0x1F53 }, // GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA
+ { 0x1F5D, 0x1F55 }, // GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA
+ { 0x1F5F, 0x1F57 }, // GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI
+ { 0x1F68, 0x1F60 }, // GREEK CAPITAL LETTER OMEGA WITH PSILI
+ { 0x1F69, 0x1F61 }, // GREEK CAPITAL LETTER OMEGA WITH DASIA
+ { 0x1F6A, 0x1F62 }, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA
+ { 0x1F6B, 0x1F63 }, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA
+ { 0x1F6C, 0x1F64 }, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA
+ { 0x1F6D, 0x1F65 }, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA
+ { 0x1F6E, 0x1F66 }, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI
+ { 0x1F6F, 0x1F67 }, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI
+ { 0x1F88, 0x1F80 }, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
+ { 0x1F89, 0x1F81 }, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
+ { 0x1F8A, 0x1F82 }, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+ { 0x1F8B, 0x1F83 }, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+ { 0x1F8C, 0x1F84 }, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+ { 0x1F8D, 0x1F85 }, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+ { 0x1F8E, 0x1F86 }, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+ { 0x1F8F, 0x1F87 }, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+ { 0x1F98, 0x1F90 }, // GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
+ { 0x1F99, 0x1F91 }, // GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
+ { 0x1F9A, 0x1F92 }, // GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+ { 0x1F9B, 0x1F93 }, // GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+ { 0x1F9C, 0x1F94 }, // GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+ { 0x1F9D, 0x1F95 }, // GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+ { 0x1F9E, 0x1F96 }, // GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+ { 0x1F9F, 0x1F97 }, // GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+ { 0x1FA8, 0x1FA0 }, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
+ { 0x1FA9, 0x1FA1 }, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
+ { 0x1FAA, 0x1FA2 }, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+ { 0x1FAB, 0x1FA3 }, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+ { 0x1FAC, 0x1FA4 }, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+ { 0x1FAD, 0x1FA5 }, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+ { 0x1FAE, 0x1FA6 }, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+ { 0x1FAF, 0x1FA7 }, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+ { 0x1FB8, 0x1FB0 }, // GREEK CAPITAL LETTER ALPHA WITH VRACHY
+ { 0x1FB9, 0x1FB1 }, // GREEK CAPITAL LETTER ALPHA WITH MACRON
+ { 0x1FBA, 0x1F70 }, // GREEK CAPITAL LETTER ALPHA WITH VARIA
+ { 0x1FBB, 0x1F71 }, // GREEK CAPITAL LETTER ALPHA WITH OXIA
+ { 0x1FBC, 0x1FB3 }, // GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
+ { 0x1FC8, 0x1F72 }, // GREEK CAPITAL LETTER EPSILON WITH VARIA
+ { 0x1FC9, 0x1F73 }, // GREEK CAPITAL LETTER EPSILON WITH OXIA
+ { 0x1FCA, 0x1F74 }, // GREEK CAPITAL LETTER ETA WITH VARIA
+ { 0x1FCB, 0x1F75 }, // GREEK CAPITAL LETTER ETA WITH OXIA
+ { 0x1FCC, 0x1FC3 }, // GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
+ { 0x1FD8, 0x1FD0 }, // GREEK CAPITAL LETTER IOTA WITH VRACHY
+ { 0x1FD9, 0x1FD1 }, // GREEK CAPITAL LETTER IOTA WITH MACRON
+ { 0x1FDA, 0x1F76 }, // GREEK CAPITAL LETTER IOTA WITH VARIA
+ { 0x1FDB, 0x1F77 }, // GREEK CAPITAL LETTER IOTA WITH OXIA
+ { 0x1FE8, 0x1FE0 }, // GREEK CAPITAL LETTER UPSILON WITH VRACHY
+ { 0x1FE9, 0x1FE1 }, // GREEK CAPITAL LETTER UPSILON WITH MACRON
+ { 0x1FEA, 0x1F7A }, // GREEK CAPITAL LETTER UPSILON WITH VARIA
+ { 0x1FEB, 0x1F7B }, // GREEK CAPITAL LETTER UPSILON WITH OXIA
+ { 0x1FEC, 0x1FE5 }, // GREEK CAPITAL LETTER RHO WITH DASIA
+ { 0x1FF8, 0x1F78 }, // GREEK CAPITAL LETTER OMICRON WITH VARIA
+ { 0x1FF9, 0x1F79 }, // GREEK CAPITAL LETTER OMICRON WITH OXIA
+ { 0x1FFA, 0x1F7C }, // GREEK CAPITAL LETTER OMEGA WITH VARIA
+ { 0x1FFB, 0x1F7D }, // GREEK CAPITAL LETTER OMEGA WITH OXIA
+ { 0x1FFC, 0x1FF3 }, // GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
+ { 0x2126, 0x03C9 }, // OHM SIGN
+ { 0x212A, 0x006B }, // KELVIN SIGN
+ { 0x212B, 0x00E5 }, // ANGSTROM SIGN
+ { 0x2132, 0x214E }, // TURNED CAPITAL F
+ { 0x2160, 0x2170 }, // ROMAN NUMERAL ONE
+ { 0x2161, 0x2171 }, // ROMAN NUMERAL TWO
+ { 0x2162, 0x2172 }, // ROMAN NUMERAL THREE
+ { 0x2163, 0x2173 }, // ROMAN NUMERAL FOUR
+ { 0x2164, 0x2174 }, // ROMAN NUMERAL FIVE
+ { 0x2165, 0x2175 }, // ROMAN NUMERAL SIX
+ { 0x2166, 0x2176 }, // ROMAN NUMERAL SEVEN
+ { 0x2167, 0x2177 }, // ROMAN NUMERAL EIGHT
+ { 0x2168, 0x2178 }, // ROMAN NUMERAL NINE
+ { 0x2169, 0x2179 }, // ROMAN NUMERAL TEN
+ { 0x216A, 0x217A }, // ROMAN NUMERAL ELEVEN
+ { 0x216B, 0x217B }, // ROMAN NUMERAL TWELVE
+ { 0x216C, 0x217C }, // ROMAN NUMERAL FIFTY
+ { 0x216D, 0x217D }, // ROMAN NUMERAL ONE HUNDRED
+ { 0x216E, 0x217E }, // ROMAN NUMERAL FIVE HUNDRED
+ { 0x216F, 0x217F }, // ROMAN NUMERAL ONE THOUSAND
+ { 0x2183, 0x2184 }, // ROMAN NUMERAL REVERSED ONE HUNDRED
+ { 0x24B6, 0x24D0 }, // CIRCLED LATIN CAPITAL LETTER A
+ { 0x24B7, 0x24D1 }, // CIRCLED LATIN CAPITAL LETTER B
+ { 0x24B8, 0x24D2 }, // CIRCLED LATIN CAPITAL LETTER C
+ { 0x24B9, 0x24D3 }, // CIRCLED LATIN CAPITAL LETTER D
+ { 0x24BA, 0x24D4 }, // CIRCLED LATIN CAPITAL LETTER E
+ { 0x24BB, 0x24D5 }, // CIRCLED LATIN CAPITAL LETTER F
+ { 0x24BC, 0x24D6 }, // CIRCLED LATIN CAPITAL LETTER G
+ { 0x24BD, 0x24D7 }, // CIRCLED LATIN CAPITAL LETTER H
+ { 0x24BE, 0x24D8 }, // CIRCLED LATIN CAPITAL LETTER I
+ { 0x24BF, 0x24D9 }, // CIRCLED LATIN CAPITAL LETTER J
+ { 0x24C0, 0x24DA }, // CIRCLED LATIN CAPITAL LETTER K
+ { 0x24C1, 0x24DB }, // CIRCLED LATIN CAPITAL LETTER L
+ { 0x24C2, 0x24DC }, // CIRCLED LATIN CAPITAL LETTER M
+ { 0x24C3, 0x24DD }, // CIRCLED LATIN CAPITAL LETTER N
+ { 0x24C4, 0x24DE }, // CIRCLED LATIN CAPITAL LETTER O
+ { 0x24C5, 0x24DF }, // CIRCLED LATIN CAPITAL LETTER P
+ { 0x24C6, 0x24E0 }, // CIRCLED LATIN CAPITAL LETTER Q
+ { 0x24C7, 0x24E1 }, // CIRCLED LATIN CAPITAL LETTER R
+ { 0x24C8, 0x24E2 }, // CIRCLED LATIN CAPITAL LETTER S
+ { 0x24C9, 0x24E3 }, // CIRCLED LATIN CAPITAL LETTER T
+ { 0x24CA, 0x24E4 }, // CIRCLED LATIN CAPITAL LETTER U
+ { 0x24CB, 0x24E5 }, // CIRCLED LATIN CAPITAL LETTER V
+ { 0x24CC, 0x24E6 }, // CIRCLED LATIN CAPITAL LETTER W
+ { 0x24CD, 0x24E7 }, // CIRCLED LATIN CAPITAL LETTER X
+ { 0x24CE, 0x24E8 }, // CIRCLED LATIN CAPITAL LETTER Y
+ { 0x24CF, 0x24E9 }, // CIRCLED LATIN CAPITAL LETTER Z
+ { 0x2C00, 0x2C30 }, // GLAGOLITIC CAPITAL LETTER AZU
+ { 0x2C01, 0x2C31 }, // GLAGOLITIC CAPITAL LETTER BUKY
+ { 0x2C02, 0x2C32 }, // GLAGOLITIC CAPITAL LETTER VEDE
+ { 0x2C03, 0x2C33 }, // GLAGOLITIC CAPITAL LETTER GLAGOLI
+ { 0x2C04, 0x2C34 }, // GLAGOLITIC CAPITAL LETTER DOBRO
+ { 0x2C05, 0x2C35 }, // GLAGOLITIC CAPITAL LETTER YESTU
+ { 0x2C06, 0x2C36 }, // GLAGOLITIC CAPITAL LETTER ZHIVETE
+ { 0x2C07, 0x2C37 }, // GLAGOLITIC CAPITAL LETTER DZELO
+ { 0x2C08, 0x2C38 }, // GLAGOLITIC CAPITAL LETTER ZEMLJA
+ { 0x2C09, 0x2C39 }, // GLAGOLITIC CAPITAL LETTER IZHE
+ { 0x2C0A, 0x2C3A }, // GLAGOLITIC CAPITAL LETTER INITIAL IZHE
+ { 0x2C0B, 0x2C3B }, // GLAGOLITIC CAPITAL LETTER I
+ { 0x2C0C, 0x2C3C }, // GLAGOLITIC CAPITAL LETTER DJERVI
+ { 0x2C0D, 0x2C3D }, // GLAGOLITIC CAPITAL LETTER KAKO
+ { 0x2C0E, 0x2C3E }, // GLAGOLITIC CAPITAL LETTER LJUDIJE
+ { 0x2C0F, 0x2C3F }, // GLAGOLITIC CAPITAL LETTER MYSLITE
+ { 0x2C10, 0x2C40 }, // GLAGOLITIC CAPITAL LETTER NASHI
+ { 0x2C11, 0x2C41 }, // GLAGOLITIC CAPITAL LETTER ONU
+ { 0x2C12, 0x2C42 }, // GLAGOLITIC CAPITAL LETTER POKOJI
+ { 0x2C13, 0x2C43 }, // GLAGOLITIC CAPITAL LETTER RITSI
+ { 0x2C14, 0x2C44 }, // GLAGOLITIC CAPITAL LETTER SLOVO
+ { 0x2C15, 0x2C45 }, // GLAGOLITIC CAPITAL LETTER TVRIDO
+ { 0x2C16, 0x2C46 }, // GLAGOLITIC CAPITAL LETTER UKU
+ { 0x2C17, 0x2C47 }, // GLAGOLITIC CAPITAL LETTER FRITU
+ { 0x2C18, 0x2C48 }, // GLAGOLITIC CAPITAL LETTER HERU
+ { 0x2C19, 0x2C49 }, // GLAGOLITIC CAPITAL LETTER OTU
+ { 0x2C1A, 0x2C4A }, // GLAGOLITIC CAPITAL LETTER PE
+ { 0x2C1B, 0x2C4B }, // GLAGOLITIC CAPITAL LETTER SHTA
+ { 0x2C1C, 0x2C4C }, // GLAGOLITIC CAPITAL LETTER TSI
+ { 0x2C1D, 0x2C4D }, // GLAGOLITIC CAPITAL LETTER CHRIVI
+ { 0x2C1E, 0x2C4E }, // GLAGOLITIC CAPITAL LETTER SHA
+ { 0x2C1F, 0x2C4F }, // GLAGOLITIC CAPITAL LETTER YERU
+ { 0x2C20, 0x2C50 }, // GLAGOLITIC CAPITAL LETTER YERI
+ { 0x2C21, 0x2C51 }, // GLAGOLITIC CAPITAL LETTER YATI
+ { 0x2C22, 0x2C52 }, // GLAGOLITIC CAPITAL LETTER SPIDERY HA
+ { 0x2C23, 0x2C53 }, // GLAGOLITIC CAPITAL LETTER YU
+ { 0x2C24, 0x2C54 }, // GLAGOLITIC CAPITAL LETTER SMALL YUS
+ { 0x2C25, 0x2C55 }, // GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL
+ { 0x2C26, 0x2C56 }, // GLAGOLITIC CAPITAL LETTER YO
+ { 0x2C27, 0x2C57 }, // GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS
+ { 0x2C28, 0x2C58 }, // GLAGOLITIC CAPITAL LETTER BIG YUS
+ { 0x2C29, 0x2C59 }, // GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS
+ { 0x2C2A, 0x2C5A }, // GLAGOLITIC CAPITAL LETTER FITA
+ { 0x2C2B, 0x2C5B }, // GLAGOLITIC CAPITAL LETTER IZHITSA
+ { 0x2C2C, 0x2C5C }, // GLAGOLITIC CAPITAL LETTER SHTAPIC
+ { 0x2C2D, 0x2C5D }, // GLAGOLITIC CAPITAL LETTER TROKUTASTI A
+ { 0x2C2E, 0x2C5E }, // GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
+ { 0x2C60, 0x2C61 }, // LATIN CAPITAL LETTER L WITH DOUBLE BAR
+ { 0x2C62, 0x026B }, // LATIN CAPITAL LETTER L WITH MIDDLE TILDE
+ { 0x2C63, 0x1D7D }, // LATIN CAPITAL LETTER P WITH STROKE
+ { 0x2C64, 0x027D }, // LATIN CAPITAL LETTER R WITH TAIL
+ { 0x2C67, 0x2C68 }, // LATIN CAPITAL LETTER H WITH DESCENDER
+ { 0x2C69, 0x2C6A }, // LATIN CAPITAL LETTER K WITH DESCENDER
+ { 0x2C6B, 0x2C6C }, // LATIN CAPITAL LETTER Z WITH DESCENDER
+ { 0x2C6D, 0x0251 }, // LATIN CAPITAL LETTER ALPHA
+ { 0x2C6E, 0x0271 }, // LATIN CAPITAL LETTER M WITH HOOK
+ { 0x2C6F, 0x0250 }, // LATIN CAPITAL LETTER TURNED A
+ { 0x2C70, 0x0252 }, // LATIN CAPITAL LETTER TURNED ALPHA
+ { 0x2C72, 0x2C73 }, // LATIN CAPITAL LETTER W WITH HOOK
+ { 0x2C75, 0x2C76 }, // LATIN CAPITAL LETTER HALF H
+ { 0x2C7E, 0x023F }, // LATIN CAPITAL LETTER S WITH SWASH TAIL
+ { 0x2C7F, 0x0240 }, // LATIN CAPITAL LETTER Z WITH SWASH TAIL
+ { 0x2C80, 0x2C81 }, // COPTIC CAPITAL LETTER ALFA
+ { 0x2C82, 0x2C83 }, // COPTIC CAPITAL LETTER VIDA
+ { 0x2C84, 0x2C85 }, // COPTIC CAPITAL LETTER GAMMA
+ { 0x2C86, 0x2C87 }, // COPTIC CAPITAL LETTER DALDA
+ { 0x2C88, 0x2C89 }, // COPTIC CAPITAL LETTER EIE
+ { 0x2C8A, 0x2C8B }, // COPTIC CAPITAL LETTER SOU
+ { 0x2C8C, 0x2C8D }, // COPTIC CAPITAL LETTER ZATA
+ { 0x2C8E, 0x2C8F }, // COPTIC CAPITAL LETTER HATE
+ { 0x2C90, 0x2C91 }, // COPTIC CAPITAL LETTER THETHE
+ { 0x2C92, 0x2C93 }, // COPTIC CAPITAL LETTER IAUDA
+ { 0x2C94, 0x2C95 }, // COPTIC CAPITAL LETTER KAPA
+ { 0x2C96, 0x2C97 }, // COPTIC CAPITAL LETTER LAULA
+ { 0x2C98, 0x2C99 }, // COPTIC CAPITAL LETTER MI
+ { 0x2C9A, 0x2C9B }, // COPTIC CAPITAL LETTER NI
+ { 0x2C9C, 0x2C9D }, // COPTIC CAPITAL LETTER KSI
+ { 0x2C9E, 0x2C9F }, // COPTIC CAPITAL LETTER O
+ { 0x2CA0, 0x2CA1 }, // COPTIC CAPITAL LETTER PI
+ { 0x2CA2, 0x2CA3 }, // COPTIC CAPITAL LETTER RO
+ { 0x2CA4, 0x2CA5 }, // COPTIC CAPITAL LETTER SIMA
+ { 0x2CA6, 0x2CA7 }, // COPTIC CAPITAL LETTER TAU
+ { 0x2CA8, 0x2CA9 }, // COPTIC CAPITAL LETTER UA
+ { 0x2CAA, 0x2CAB }, // COPTIC CAPITAL LETTER FI
+ { 0x2CAC, 0x2CAD }, // COPTIC CAPITAL LETTER KHI
+ { 0x2CAE, 0x2CAF }, // COPTIC CAPITAL LETTER PSI
+ { 0x2CB0, 0x2CB1 }, // COPTIC CAPITAL LETTER OOU
+ { 0x2CB2, 0x2CB3 }, // COPTIC CAPITAL LETTER DIALECT-P ALEF
+ { 0x2CB4, 0x2CB5 }, // COPTIC CAPITAL LETTER OLD COPTIC AIN
+ { 0x2CB6, 0x2CB7 }, // COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE
+ { 0x2CB8, 0x2CB9 }, // COPTIC CAPITAL LETTER DIALECT-P KAPA
+ { 0x2CBA, 0x2CBB }, // COPTIC CAPITAL LETTER DIALECT-P NI
+ { 0x2CBC, 0x2CBD }, // COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI
+ { 0x2CBE, 0x2CBF }, // COPTIC CAPITAL LETTER OLD COPTIC OOU
+ { 0x2CC0, 0x2CC1 }, // COPTIC CAPITAL LETTER SAMPI
+ { 0x2CC2, 0x2CC3 }, // COPTIC CAPITAL LETTER CROSSED SHEI
+ { 0x2CC4, 0x2CC5 }, // COPTIC CAPITAL LETTER OLD COPTIC SHEI
+ { 0x2CC6, 0x2CC7 }, // COPTIC CAPITAL LETTER OLD COPTIC ESH
+ { 0x2CC8, 0x2CC9 }, // COPTIC CAPITAL LETTER AKHMIMIC KHEI
+ { 0x2CCA, 0x2CCB }, // COPTIC CAPITAL LETTER DIALECT-P HORI
+ { 0x2CCC, 0x2CCD }, // COPTIC CAPITAL LETTER OLD COPTIC HORI
+ { 0x2CCE, 0x2CCF }, // COPTIC CAPITAL LETTER OLD COPTIC HA
+ { 0x2CD0, 0x2CD1 }, // COPTIC CAPITAL LETTER L-SHAPED HA
+ { 0x2CD2, 0x2CD3 }, // COPTIC CAPITAL LETTER OLD COPTIC HEI
+ { 0x2CD4, 0x2CD5 }, // COPTIC CAPITAL LETTER OLD COPTIC HAT
+ { 0x2CD6, 0x2CD7 }, // COPTIC CAPITAL LETTER OLD COPTIC GANGIA
+ { 0x2CD8, 0x2CD9 }, // COPTIC CAPITAL LETTER OLD COPTIC DJA
+ { 0x2CDA, 0x2CDB }, // COPTIC CAPITAL LETTER OLD COPTIC SHIMA
+ { 0x2CDC, 0x2CDD }, // COPTIC CAPITAL LETTER OLD NUBIAN SHIMA
+ { 0x2CDE, 0x2CDF }, // COPTIC CAPITAL LETTER OLD NUBIAN NGI
+ { 0x2CE0, 0x2CE1 }, // COPTIC CAPITAL LETTER OLD NUBIAN NYI
+ { 0x2CE2, 0x2CE3 }, // COPTIC CAPITAL LETTER OLD NUBIAN WAU
+ { 0x2CEB, 0x2CEC }, // COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI
+ { 0x2CED, 0x2CEE }, // COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA
+ { 0xA640, 0xA641 }, // CYRILLIC CAPITAL LETTER ZEMLYA
+ { 0xA642, 0xA643 }, // CYRILLIC CAPITAL LETTER DZELO
+ { 0xA644, 0xA645 }, // CYRILLIC CAPITAL LETTER REVERSED DZE
+ { 0xA646, 0xA647 }, // CYRILLIC CAPITAL LETTER IOTA
+ { 0xA648, 0xA649 }, // CYRILLIC CAPITAL LETTER DJERV
+ { 0xA64A, 0xA64B }, // CYRILLIC CAPITAL LETTER MONOGRAPH UK
+ { 0xA64C, 0xA64D }, // CYRILLIC CAPITAL LETTER BROAD OMEGA
+ { 0xA64E, 0xA64F }, // CYRILLIC CAPITAL LETTER NEUTRAL YER
+ { 0xA650, 0xA651 }, // CYRILLIC CAPITAL LETTER YERU WITH BACK YER
+ { 0xA652, 0xA653 }, // CYRILLIC CAPITAL LETTER IOTIFIED YAT
+ { 0xA654, 0xA655 }, // CYRILLIC CAPITAL LETTER REVERSED YU
+ { 0xA656, 0xA657 }, // CYRILLIC CAPITAL LETTER IOTIFIED A
+ { 0xA658, 0xA659 }, // CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS
+ { 0xA65A, 0xA65B }, // CYRILLIC CAPITAL LETTER BLENDED YUS
+ { 0xA65C, 0xA65D }, // CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS
+ { 0xA65E, 0xA65F }, // CYRILLIC CAPITAL LETTER YN
+ { 0xA660, 0xA661 }, // CYRILLIC CAPITAL LETTER REVERSED TSE
+ { 0xA662, 0xA663 }, // CYRILLIC CAPITAL LETTER SOFT DE
+ { 0xA664, 0xA665 }, // CYRILLIC CAPITAL LETTER SOFT EL
+ { 0xA666, 0xA667 }, // CYRILLIC CAPITAL LETTER SOFT EM
+ { 0xA668, 0xA669 }, // CYRILLIC CAPITAL LETTER MONOCULAR O
+ { 0xA66A, 0xA66B }, // CYRILLIC CAPITAL LETTER BINOCULAR O
+ { 0xA66C, 0xA66D }, // CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O
+ { 0xA680, 0xA681 }, // CYRILLIC CAPITAL LETTER DWE
+ { 0xA682, 0xA683 }, // CYRILLIC CAPITAL LETTER DZWE
+ { 0xA684, 0xA685 }, // CYRILLIC CAPITAL LETTER ZHWE
+ { 0xA686, 0xA687 }, // CYRILLIC CAPITAL LETTER CCHE
+ { 0xA688, 0xA689 }, // CYRILLIC CAPITAL LETTER DZZE
+ { 0xA68A, 0xA68B }, // CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK
+ { 0xA68C, 0xA68D }, // CYRILLIC CAPITAL LETTER TWE
+ { 0xA68E, 0xA68F }, // CYRILLIC CAPITAL LETTER TSWE
+ { 0xA690, 0xA691 }, // CYRILLIC CAPITAL LETTER TSSE
+ { 0xA692, 0xA693 }, // CYRILLIC CAPITAL LETTER TCHE
+ { 0xA694, 0xA695 }, // CYRILLIC CAPITAL LETTER HWE
+ { 0xA696, 0xA697 }, // CYRILLIC CAPITAL LETTER SHWE
+ { 0xA722, 0xA723 }, // LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF
+ { 0xA724, 0xA725 }, // LATIN CAPITAL LETTER EGYPTOLOGICAL AIN
+ { 0xA726, 0xA727 }, // LATIN CAPITAL LETTER HENG
+ { 0xA728, 0xA729 }, // LATIN CAPITAL LETTER TZ
+ { 0xA72A, 0xA72B }, // LATIN CAPITAL LETTER TRESILLO
+ { 0xA72C, 0xA72D }, // LATIN CAPITAL LETTER CUATRILLO
+ { 0xA72E, 0xA72F }, // LATIN CAPITAL LETTER CUATRILLO WITH COMMA
+ { 0xA732, 0xA733 }, // LATIN CAPITAL LETTER AA
+ { 0xA734, 0xA735 }, // LATIN CAPITAL LETTER AO
+ { 0xA736, 0xA737 }, // LATIN CAPITAL LETTER AU
+ { 0xA738, 0xA739 }, // LATIN CAPITAL LETTER AV
+ { 0xA73A, 0xA73B }, // LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR
+ { 0xA73C, 0xA73D }, // LATIN CAPITAL LETTER AY
+ { 0xA73E, 0xA73F }, // LATIN CAPITAL LETTER REVERSED C WITH DOT
+ { 0xA740, 0xA741 }, // LATIN CAPITAL LETTER K WITH STROKE
+ { 0xA742, 0xA743 }, // LATIN CAPITAL LETTER K WITH DIAGONAL STROKE
+ { 0xA744, 0xA745 }, // LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE
+ { 0xA746, 0xA747 }, // LATIN CAPITAL LETTER BROKEN L
+ { 0xA748, 0xA749 }, // LATIN CAPITAL LETTER L WITH HIGH STROKE
+ { 0xA74A, 0xA74B }, // LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY
+ { 0xA74C, 0xA74D }, // LATIN CAPITAL LETTER O WITH LOOP
+ { 0xA74E, 0xA74F }, // LATIN CAPITAL LETTER OO
+ { 0xA750, 0xA751 }, // LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER
+ { 0xA752, 0xA753 }, // LATIN CAPITAL LETTER P WITH FLOURISH
+ { 0xA754, 0xA755 }, // LATIN CAPITAL LETTER P WITH SQUIRREL TAIL
+ { 0xA756, 0xA757 }, // LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER
+ { 0xA758, 0xA759 }, // LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE
+ { 0xA75A, 0xA75B }, // LATIN CAPITAL LETTER R ROTUNDA
+ { 0xA75C, 0xA75D }, // LATIN CAPITAL LETTER RUM ROTUNDA
+ { 0xA75E, 0xA75F }, // LATIN CAPITAL LETTER V WITH DIAGONAL STROKE
+ { 0xA760, 0xA761 }, // LATIN CAPITAL LETTER VY
+ { 0xA762, 0xA763 }, // LATIN CAPITAL LETTER VISIGOTHIC Z
+ { 0xA764, 0xA765 }, // LATIN CAPITAL LETTER THORN WITH STROKE
+ { 0xA766, 0xA767 }, // LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER
+ { 0xA768, 0xA769 }, // LATIN CAPITAL LETTER VEND
+ { 0xA76A, 0xA76B }, // LATIN CAPITAL LETTER ET
+ { 0xA76C, 0xA76D }, // LATIN CAPITAL LETTER IS
+ { 0xA76E, 0xA76F }, // LATIN CAPITAL LETTER CON
+ { 0xA779, 0xA77A }, // LATIN CAPITAL LETTER INSULAR D
+ { 0xA77B, 0xA77C }, // LATIN CAPITAL LETTER INSULAR F
+ { 0xA77D, 0x1D79 }, // LATIN CAPITAL LETTER INSULAR G
+ { 0xA77E, 0xA77F }, // LATIN CAPITAL LETTER TURNED INSULAR G
+ { 0xA780, 0xA781 }, // LATIN CAPITAL LETTER TURNED L
+ { 0xA782, 0xA783 }, // LATIN CAPITAL LETTER INSULAR R
+ { 0xA784, 0xA785 }, // LATIN CAPITAL LETTER INSULAR S
+ { 0xA786, 0xA787 }, // LATIN CAPITAL LETTER INSULAR T
+ { 0xA78B, 0xA78C }, // LATIN CAPITAL LETTER SALTILLO
+ { 0xA78D, 0x0265 }, // LATIN CAPITAL LETTER TURNED H
+ { 0xA790, 0xA791 }, // LATIN CAPITAL LETTER N WITH DESCENDER
+ { 0xA7A0, 0xA7A1 }, // LATIN CAPITAL LETTER G WITH OBLIQUE STROKE
+ { 0xA7A2, 0xA7A3 }, // LATIN CAPITAL LETTER K WITH OBLIQUE STROKE
+ { 0xA7A4, 0xA7A5 }, // LATIN CAPITAL LETTER N WITH OBLIQUE STROKE
+ { 0xA7A6, 0xA7A7 }, // LATIN CAPITAL LETTER R WITH OBLIQUE STROKE
+ { 0xA7A8, 0xA7A9 }, // LATIN CAPITAL LETTER S WITH OBLIQUE STROKE
+ { 0xFF21, 0xFF41 }, // FULLWIDTH LATIN CAPITAL LETTER A
+ { 0xFF22, 0xFF42 }, // FULLWIDTH LATIN CAPITAL LETTER B
+ { 0xFF23, 0xFF43 }, // FULLWIDTH LATIN CAPITAL LETTER C
+ { 0xFF24, 0xFF44 }, // FULLWIDTH LATIN CAPITAL LETTER D
+ { 0xFF25, 0xFF45 }, // FULLWIDTH LATIN CAPITAL LETTER E
+ { 0xFF26, 0xFF46 }, // FULLWIDTH LATIN CAPITAL LETTER F
+ { 0xFF27, 0xFF47 }, // FULLWIDTH LATIN CAPITAL LETTER G
+ { 0xFF28, 0xFF48 }, // FULLWIDTH LATIN CAPITAL LETTER H
+ { 0xFF29, 0xFF49 }, // FULLWIDTH LATIN CAPITAL LETTER I
+ { 0xFF2A, 0xFF4A }, // FULLWIDTH LATIN CAPITAL LETTER J
+ { 0xFF2B, 0xFF4B }, // FULLWIDTH LATIN CAPITAL LETTER K
+ { 0xFF2C, 0xFF4C }, // FULLWIDTH LATIN CAPITAL LETTER L
+ { 0xFF2D, 0xFF4D }, // FULLWIDTH LATIN CAPITAL LETTER M
+ { 0xFF2E, 0xFF4E }, // FULLWIDTH LATIN CAPITAL LETTER N
+ { 0xFF2F, 0xFF4F }, // FULLWIDTH LATIN CAPITAL LETTER O
+ { 0xFF30, 0xFF50 }, // FULLWIDTH LATIN CAPITAL LETTER P
+ { 0xFF31, 0xFF51 }, // FULLWIDTH LATIN CAPITAL LETTER Q
+ { 0xFF32, 0xFF52 }, // FULLWIDTH LATIN CAPITAL LETTER R
+ { 0xFF33, 0xFF53 }, // FULLWIDTH LATIN CAPITAL LETTER S
+ { 0xFF34, 0xFF54 }, // FULLWIDTH LATIN CAPITAL LETTER T
+ { 0xFF35, 0xFF55 }, // FULLWIDTH LATIN CAPITAL LETTER U
+ { 0xFF36, 0xFF56 }, // FULLWIDTH LATIN CAPITAL LETTER V
+ { 0xFF37, 0xFF57 }, // FULLWIDTH LATIN CAPITAL LETTER W
+ { 0xFF38, 0xFF58 }, // FULLWIDTH LATIN CAPITAL LETTER X
+ { 0xFF39, 0xFF59 }, // FULLWIDTH LATIN CAPITAL LETTER Y
+ { 0xFF3A, 0xFF5A } // FULLWIDTH LATIN CAPITAL LETTER Z
+};
+
+static int compare_pair_capital(const void *a, const void *b) {
+ return static_cast<int>(*static_cast<const unsigned short *>(a))
+ - static_cast<int>((static_cast<const struct LatinCapitalSmallPair *>(b))->capital);
+}
+
+/* static */ unsigned short CharUtils::latin_tolower(const unsigned short c) {
+ struct LatinCapitalSmallPair *p =
+ static_cast<struct LatinCapitalSmallPair *>(bsearch(&c, SORTED_CHAR_MAP,
+ NELEMS(SORTED_CHAR_MAP), sizeof(SORTED_CHAR_MAP[0]), compare_pair_capital));
+ return p ? p->small : c;
+}
+
+/*
+ * Table mapping most combined Latin, Greek, and Cyrillic characters
+ * to their base characters. If c is in range, CharUtils::BASE_CHARS[c] == c
+ * if c is not a combined character, or the base character if it
+ * is combined.
+ *
+ * Generated with:
+ * cat UnicodeData.txt | perl -e 'while (<>) { @foo = split(/;/); $foo[5] =~ s/<.*> //; \
+ * $base[hex($foo[0])] = hex($foo[5]);} \
+ * for ($i = 0; $i < 0x500; $i += 8) { printf("/" . "* U+%04X *" . "/ ", $i); \
+ * for ($j = $i; $j < $i + 8; $j++) { \
+ * printf("0x%04X, ", $base[$j] ? $base[$j] : $j)}; print "\n"; }'
+ */
+/* static */ const unsigned short CharUtils::BASE_CHARS[CharUtils::BASE_CHARS_SIZE] = {
+ /* U+0000 */ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
+ /* U+0008 */ 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F,
+ /* U+0010 */ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
+ /* U+0018 */ 0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F,
+ /* U+0020 */ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
+ /* U+0028 */ 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
+ /* U+0030 */ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
+ /* U+0038 */ 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
+ /* U+0040 */ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
+ /* U+0048 */ 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
+ /* U+0050 */ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
+ /* U+0058 */ 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F,
+ /* U+0060 */ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
+ /* U+0068 */ 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
+ /* U+0070 */ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
+ /* U+0078 */ 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F,
+ /* U+0080 */ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
+ /* U+0088 */ 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
+ /* U+0090 */ 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
+ /* U+0098 */ 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
+ /* U+00A0 */ 0x0020, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
+ /* U+00A8 */ 0x0020, 0x00A9, 0x0061, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x0020,
+ /* U+00B0 */ 0x00B0, 0x00B1, 0x0032, 0x0033, 0x0020, 0x03BC, 0x00B6, 0x00B7,
+ /* U+00B8 */ 0x0020, 0x0031, 0x006F, 0x00BB, 0x0031, 0x0031, 0x0033, 0x00BF,
+ /* U+00C0 */ 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x00C6, 0x0043,
+ /* U+00C8 */ 0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049,
+ /* U+00D0 */ 0x00D0, 0x004E, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x00D7,
+ /* U+00D8 */ 0x004F, 0x0055, 0x0055, 0x0055, 0x0055, 0x0059, 0x00DE, 0x0073,
+ // U+00D8: Manually changed from 00D8 to 004F
+ // TODO: Check if it's really acceptable to consider Ø a diacritical variant of O
+ // U+00DF: Manually changed from 00DF to 0073
+ /* U+00E0 */ 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x00E6, 0x0063,
+ /* U+00E8 */ 0x0065, 0x0065, 0x0065, 0x0065, 0x0069, 0x0069, 0x0069, 0x0069,
+ /* U+00F0 */ 0x00F0, 0x006E, 0x006F, 0x006F, 0x006F, 0x006F, 0x006F, 0x00F7,
+ /* U+00F8 */ 0x006F, 0x0075, 0x0075, 0x0075, 0x0075, 0x0079, 0x00FE, 0x0079,
+ // U+00F8: Manually changed from 00F8 to 006F
+ // TODO: Check if it's really acceptable to consider ø a diacritical variant of o
+ /* U+0100 */ 0x0041, 0x0061, 0x0041, 0x0061, 0x0041, 0x0061, 0x0043, 0x0063,
+ /* U+0108 */ 0x0043, 0x0063, 0x0043, 0x0063, 0x0043, 0x0063, 0x0044, 0x0064,
+ /* U+0110 */ 0x0110, 0x0111, 0x0045, 0x0065, 0x0045, 0x0065, 0x0045, 0x0065,
+ /* U+0118 */ 0x0045, 0x0065, 0x0045, 0x0065, 0x0047, 0x0067, 0x0047, 0x0067,
+ /* U+0120 */ 0x0047, 0x0067, 0x0047, 0x0067, 0x0048, 0x0068, 0x0126, 0x0127,
+ /* U+0128 */ 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069,
+ // U+0131: Manually changed from 0131 to 0049
+ /* U+0130 */ 0x0049, 0x0049, 0x0049, 0x0069, 0x004A, 0x006A, 0x004B, 0x006B,
+ /* U+0138 */ 0x0138, 0x004C, 0x006C, 0x004C, 0x006C, 0x004C, 0x006C, 0x004C,
+ /* U+0140 */ 0x006C, 0x004C, 0x006C, 0x004E, 0x006E, 0x004E, 0x006E, 0x004E,
+ // U+0141: Manually changed from 0141 to 004C
+ // U+0142: Manually changed from 0142 to 006C
+ /* U+0148 */ 0x006E, 0x02BC, 0x014A, 0x014B, 0x004F, 0x006F, 0x004F, 0x006F,
+ /* U+0150 */ 0x004F, 0x006F, 0x0152, 0x0153, 0x0052, 0x0072, 0x0052, 0x0072,
+ /* U+0158 */ 0x0052, 0x0072, 0x0053, 0x0073, 0x0053, 0x0073, 0x0053, 0x0073,
+ /* U+0160 */ 0x0053, 0x0073, 0x0054, 0x0074, 0x0054, 0x0074, 0x0166, 0x0167,
+ /* U+0168 */ 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075,
+ /* U+0170 */ 0x0055, 0x0075, 0x0055, 0x0075, 0x0057, 0x0077, 0x0059, 0x0079,
+ /* U+0178 */ 0x0059, 0x005A, 0x007A, 0x005A, 0x007A, 0x005A, 0x007A, 0x0073,
+ /* U+0180 */ 0x0180, 0x0181, 0x0182, 0x0183, 0x0184, 0x0185, 0x0186, 0x0187,
+ /* U+0188 */ 0x0188, 0x0189, 0x018A, 0x018B, 0x018C, 0x018D, 0x018E, 0x018F,
+ /* U+0190 */ 0x0190, 0x0191, 0x0192, 0x0193, 0x0194, 0x0195, 0x0196, 0x0197,
+ /* U+0198 */ 0x0198, 0x0199, 0x019A, 0x019B, 0x019C, 0x019D, 0x019E, 0x019F,
+ /* U+01A0 */ 0x004F, 0x006F, 0x01A2, 0x01A3, 0x01A4, 0x01A5, 0x01A6, 0x01A7,
+ /* U+01A8 */ 0x01A8, 0x01A9, 0x01AA, 0x01AB, 0x01AC, 0x01AD, 0x01AE, 0x0055,
+ /* U+01B0 */ 0x0075, 0x01B1, 0x01B2, 0x01B3, 0x01B4, 0x01B5, 0x01B6, 0x01B7,
+ /* U+01B8 */ 0x01B8, 0x01B9, 0x01BA, 0x01BB, 0x01BC, 0x01BD, 0x01BE, 0x01BF,
+ /* U+01C0 */ 0x01C0, 0x01C1, 0x01C2, 0x01C3, 0x0044, 0x0044, 0x0064, 0x004C,
+ /* U+01C8 */ 0x004C, 0x006C, 0x004E, 0x004E, 0x006E, 0x0041, 0x0061, 0x0049,
+ /* U+01D0 */ 0x0069, 0x004F, 0x006F, 0x0055, 0x0075, 0x0055, 0x0075, 0x0055,
+ // U+01D5: Manually changed from 00DC to 0055
+ // U+01D6: Manually changed from 00FC to 0075
+ // U+01D7: Manually changed from 00DC to 0055
+ /* U+01D8 */ 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 0x01DD, 0x0041, 0x0061,
+ // U+01D8: Manually changed from 00FC to 0075
+ // U+01D9: Manually changed from 00DC to 0055
+ // U+01DA: Manually changed from 00FC to 0075
+ // U+01DB: Manually changed from 00DC to 0055
+ // U+01DC: Manually changed from 00FC to 0075
+ // U+01DE: Manually changed from 00C4 to 0041
+ // U+01DF: Manually changed from 00E4 to 0061
+ /* U+01E0 */ 0x0041, 0x0061, 0x00C6, 0x00E6, 0x01E4, 0x01E5, 0x0047, 0x0067,
+ // U+01E0: Manually changed from 0226 to 0041
+ // U+01E1: Manually changed from 0227 to 0061
+ /* U+01E8 */ 0x004B, 0x006B, 0x004F, 0x006F, 0x004F, 0x006F, 0x01B7, 0x0292,
+ // U+01EC: Manually changed from 01EA to 004F
+ // U+01ED: Manually changed from 01EB to 006F
+ /* U+01F0 */ 0x006A, 0x0044, 0x0044, 0x0064, 0x0047, 0x0067, 0x01F6, 0x01F7,
+ /* U+01F8 */ 0x004E, 0x006E, 0x0041, 0x0061, 0x00C6, 0x00E6, 0x004F, 0x006F,
+ // U+01FA: Manually changed from 00C5 to 0041
+ // U+01FB: Manually changed from 00E5 to 0061
+ // U+01FE: Manually changed from 00D8 to 004F
+ // TODO: Check if it's really acceptable to consider Ø a diacritical variant of O
+ // U+01FF: Manually changed from 00F8 to 006F
+ // TODO: Check if it's really acceptable to consider ø a diacritical variant of o
+ /* U+0200 */ 0x0041, 0x0061, 0x0041, 0x0061, 0x0045, 0x0065, 0x0045, 0x0065,
+ /* U+0208 */ 0x0049, 0x0069, 0x0049, 0x0069, 0x004F, 0x006F, 0x004F, 0x006F,
+ /* U+0210 */ 0x0052, 0x0072, 0x0052, 0x0072, 0x0055, 0x0075, 0x0055, 0x0075,
+ /* U+0218 */ 0x0053, 0x0073, 0x0054, 0x0074, 0x021C, 0x021D, 0x0048, 0x0068,
+ /* U+0220 */ 0x0220, 0x0221, 0x0222, 0x0223, 0x0224, 0x0225, 0x0041, 0x0061,
+ /* U+0228 */ 0x0045, 0x0065, 0x004F, 0x006F, 0x004F, 0x006F, 0x004F, 0x006F,
+ // U+022A: Manually changed from 00D6 to 004F
+ // U+022B: Manually changed from 00F6 to 006F
+ // U+022C: Manually changed from 00D5 to 004F
+ // U+022D: Manually changed from 00F5 to 006F
+ /* U+0230 */ 0x004F, 0x006F, 0x0059, 0x0079, 0x0234, 0x0235, 0x0236, 0x0237,
+ // U+0230: Manually changed from 022E to 004F
+ // U+0231: Manually changed from 022F to 006F
+ /* U+0238 */ 0x0238, 0x0239, 0x023A, 0x023B, 0x023C, 0x023D, 0x023E, 0x023F,
+ /* U+0240 */ 0x0240, 0x0241, 0x0242, 0x0243, 0x0244, 0x0245, 0x0246, 0x0247,
+ /* U+0248 */ 0x0248, 0x0249, 0x024A, 0x024B, 0x024C, 0x024D, 0x024E, 0x024F,
+ /* U+0250 */ 0x0250, 0x0251, 0x0252, 0x0253, 0x0254, 0x0255, 0x0256, 0x0257,
+ /* U+0258 */ 0x0258, 0x0259, 0x025A, 0x025B, 0x025C, 0x025D, 0x025E, 0x025F,
+ /* U+0260 */ 0x0260, 0x0261, 0x0262, 0x0263, 0x0264, 0x0265, 0x0266, 0x0267,
+ /* U+0268 */ 0x0268, 0x0269, 0x026A, 0x026B, 0x026C, 0x026D, 0x026E, 0x026F,
+ /* U+0270 */ 0x0270, 0x0271, 0x0272, 0x0273, 0x0274, 0x0275, 0x0276, 0x0277,
+ /* U+0278 */ 0x0278, 0x0279, 0x027A, 0x027B, 0x027C, 0x027D, 0x027E, 0x027F,
+ /* U+0280 */ 0x0280, 0x0281, 0x0282, 0x0283, 0x0284, 0x0285, 0x0286, 0x0287,
+ /* U+0288 */ 0x0288, 0x0289, 0x028A, 0x028B, 0x028C, 0x028D, 0x028E, 0x028F,
+ /* U+0290 */ 0x0290, 0x0291, 0x0292, 0x0293, 0x0294, 0x0295, 0x0296, 0x0297,
+ /* U+0298 */ 0x0298, 0x0299, 0x029A, 0x029B, 0x029C, 0x029D, 0x029E, 0x029F,
+ /* U+02A0 */ 0x02A0, 0x02A1, 0x02A2, 0x02A3, 0x02A4, 0x02A5, 0x02A6, 0x02A7,
+ /* U+02A8 */ 0x02A8, 0x02A9, 0x02AA, 0x02AB, 0x02AC, 0x02AD, 0x02AE, 0x02AF,
+ /* U+02B0 */ 0x0068, 0x0266, 0x006A, 0x0072, 0x0279, 0x027B, 0x0281, 0x0077,
+ /* U+02B8 */ 0x0079, 0x02B9, 0x02BA, 0x02BB, 0x02BC, 0x02BD, 0x02BE, 0x02BF,
+ /* U+02C0 */ 0x02C0, 0x02C1, 0x02C2, 0x02C3, 0x02C4, 0x02C5, 0x02C6, 0x02C7,
+ /* U+02C8 */ 0x02C8, 0x02C9, 0x02CA, 0x02CB, 0x02CC, 0x02CD, 0x02CE, 0x02CF,
+ /* U+02D0 */ 0x02D0, 0x02D1, 0x02D2, 0x02D3, 0x02D4, 0x02D5, 0x02D6, 0x02D7,
+ /* U+02D8 */ 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x02DE, 0x02DF,
+ /* U+02E0 */ 0x0263, 0x006C, 0x0073, 0x0078, 0x0295, 0x02E5, 0x02E6, 0x02E7,
+ /* U+02E8 */ 0x02E8, 0x02E9, 0x02EA, 0x02EB, 0x02EC, 0x02ED, 0x02EE, 0x02EF,
+ /* U+02F0 */ 0x02F0, 0x02F1, 0x02F2, 0x02F3, 0x02F4, 0x02F5, 0x02F6, 0x02F7,
+ /* U+02F8 */ 0x02F8, 0x02F9, 0x02FA, 0x02FB, 0x02FC, 0x02FD, 0x02FE, 0x02FF,
+ /* U+0300 */ 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 0x0307,
+ /* U+0308 */ 0x0308, 0x0309, 0x030A, 0x030B, 0x030C, 0x030D, 0x030E, 0x030F,
+ /* U+0310 */ 0x0310, 0x0311, 0x0312, 0x0313, 0x0314, 0x0315, 0x0316, 0x0317,
+ /* U+0318 */ 0x0318, 0x0319, 0x031A, 0x031B, 0x031C, 0x031D, 0x031E, 0x031F,
+ /* U+0320 */ 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326, 0x0327,
+ /* U+0328 */ 0x0328, 0x0329, 0x032A, 0x032B, 0x032C, 0x032D, 0x032E, 0x032F,
+ /* U+0330 */ 0x0330, 0x0331, 0x0332, 0x0333, 0x0334, 0x0335, 0x0336, 0x0337,
+ /* U+0338 */ 0x0338, 0x0339, 0x033A, 0x033B, 0x033C, 0x033D, 0x033E, 0x033F,
+ /* U+0340 */ 0x0300, 0x0301, 0x0342, 0x0313, 0x0308, 0x0345, 0x0346, 0x0347,
+ /* U+0348 */ 0x0348, 0x0349, 0x034A, 0x034B, 0x034C, 0x034D, 0x034E, 0x034F,
+ /* U+0350 */ 0x0350, 0x0351, 0x0352, 0x0353, 0x0354, 0x0355, 0x0356, 0x0357,
+ /* U+0358 */ 0x0358, 0x0359, 0x035A, 0x035B, 0x035C, 0x035D, 0x035E, 0x035F,
+ /* U+0360 */ 0x0360, 0x0361, 0x0362, 0x0363, 0x0364, 0x0365, 0x0366, 0x0367,
+ /* U+0368 */ 0x0368, 0x0369, 0x036A, 0x036B, 0x036C, 0x036D, 0x036E, 0x036F,
+ /* U+0370 */ 0x0370, 0x0371, 0x0372, 0x0373, 0x02B9, 0x0375, 0x0376, 0x0377,
+ /* U+0378 */ 0x0378, 0x0379, 0x0020, 0x037B, 0x037C, 0x037D, 0x003B, 0x037F,
+ /* U+0380 */ 0x0380, 0x0381, 0x0382, 0x0383, 0x0020, 0x00A8, 0x0391, 0x00B7,
+ /* U+0388 */ 0x0395, 0x0397, 0x0399, 0x038B, 0x039F, 0x038D, 0x03A5, 0x03A9,
+ /* U+0390 */ 0x03CA, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
+ /* U+0398 */ 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F,
+ /* U+03A0 */ 0x03A0, 0x03A1, 0x03A2, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7,
+ /* U+03A8 */ 0x03A8, 0x03A9, 0x0399, 0x03A5, 0x03B1, 0x03B5, 0x03B7, 0x03B9,
+ /* U+03B0 */ 0x03CB, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7,
+ /* U+03B8 */ 0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF,
+ /* U+03C0 */ 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7,
+ /* U+03C8 */ 0x03C8, 0x03C9, 0x03B9, 0x03C5, 0x03BF, 0x03C5, 0x03C9, 0x03CF,
+ /* U+03D0 */ 0x03B2, 0x03B8, 0x03A5, 0x03D2, 0x03D2, 0x03C6, 0x03C0, 0x03D7,
+ /* U+03D8 */ 0x03D8, 0x03D9, 0x03DA, 0x03DB, 0x03DC, 0x03DD, 0x03DE, 0x03DF,
+ /* U+03E0 */ 0x03E0, 0x03E1, 0x03E2, 0x03E3, 0x03E4, 0x03E5, 0x03E6, 0x03E7,
+ /* U+03E8 */ 0x03E8, 0x03E9, 0x03EA, 0x03EB, 0x03EC, 0x03ED, 0x03EE, 0x03EF,
+ /* U+03F0 */ 0x03BA, 0x03C1, 0x03C2, 0x03F3, 0x0398, 0x03B5, 0x03F6, 0x03F7,
+ /* U+03F8 */ 0x03F8, 0x03A3, 0x03FA, 0x03FB, 0x03FC, 0x03FD, 0x03FE, 0x03FF,
+ /* U+0400 */ 0x0415, 0x0415, 0x0402, 0x0413, 0x0404, 0x0405, 0x0406, 0x0406,
+ /* U+0408 */ 0x0408, 0x0409, 0x040A, 0x040B, 0x041A, 0x0418, 0x0423, 0x040F,
+ /* U+0410 */ 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
+ /* U+0418 */ 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
+ // U+0419: Manually changed from 0418 to 0419
+ /* U+0420 */ 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
+ /* U+0428 */ 0x0428, 0x0429, 0x042C, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
+ // U+042A: Manually changed from 042A to 042C
+ /* U+0430 */ 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
+ /* U+0438 */ 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
+ // U+0439: Manually changed from 0438 to 0439
+ /* U+0440 */ 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
+ /* U+0448 */ 0x0448, 0x0449, 0x044C, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F,
+ // U+044A: Manually changed from 044A to 044C
+ /* U+0450 */ 0x0435, 0x0435, 0x0452, 0x0433, 0x0454, 0x0455, 0x0456, 0x0456,
+ /* U+0458 */ 0x0458, 0x0459, 0x045A, 0x045B, 0x043A, 0x0438, 0x0443, 0x045F,
+ /* U+0460 */ 0x0460, 0x0461, 0x0462, 0x0463, 0x0464, 0x0465, 0x0466, 0x0467,
+ /* U+0468 */ 0x0468, 0x0469, 0x046A, 0x046B, 0x046C, 0x046D, 0x046E, 0x046F,
+ /* U+0470 */ 0x0470, 0x0471, 0x0472, 0x0473, 0x0474, 0x0475, 0x0474, 0x0475,
+ /* U+0478 */ 0x0478, 0x0479, 0x047A, 0x047B, 0x047C, 0x047D, 0x047E, 0x047F,
+ /* U+0480 */ 0x0480, 0x0481, 0x0482, 0x0483, 0x0484, 0x0485, 0x0486, 0x0487,
+ /* U+0488 */ 0x0488, 0x0489, 0x048A, 0x048B, 0x048C, 0x048D, 0x048E, 0x048F,
+ /* U+0490 */ 0x0490, 0x0491, 0x0492, 0x0493, 0x0494, 0x0495, 0x0496, 0x0497,
+ /* U+0498 */ 0x0498, 0x0499, 0x049A, 0x049B, 0x049C, 0x049D, 0x049E, 0x049F,
+ /* U+04A0 */ 0x04A0, 0x04A1, 0x04A2, 0x04A3, 0x04A4, 0x04A5, 0x04A6, 0x04A7,
+ /* U+04A8 */ 0x04A8, 0x04A9, 0x04AA, 0x04AB, 0x04AC, 0x04AD, 0x04AE, 0x04AF,
+ /* U+04B0 */ 0x04B0, 0x04B1, 0x04B2, 0x04B3, 0x04B4, 0x04B5, 0x04B6, 0x04B7,
+ /* U+04B8 */ 0x04B8, 0x04B9, 0x04BA, 0x04BB, 0x04BC, 0x04BD, 0x04BE, 0x04BF,
+ /* U+04C0 */ 0x04C0, 0x0416, 0x0436, 0x04C3, 0x04C4, 0x04C5, 0x04C6, 0x04C7,
+ /* U+04C8 */ 0x04C8, 0x04C9, 0x04CA, 0x04CB, 0x04CC, 0x04CD, 0x04CE, 0x04CF,
+ /* U+04D0 */ 0x0410, 0x0430, 0x0410, 0x0430, 0x04D4, 0x04D5, 0x0415, 0x0435,
+ /* U+04D8 */ 0x04D8, 0x04D9, 0x04D8, 0x04D9, 0x0416, 0x0436, 0x0417, 0x0437,
+ /* U+04E0 */ 0x04E0, 0x04E1, 0x0418, 0x0438, 0x0418, 0x0438, 0x041E, 0x043E,
+ /* U+04E8 */ 0x04E8, 0x04E9, 0x04E8, 0x04E9, 0x042D, 0x044D, 0x0423, 0x0443,
+ /* U+04F0 */ 0x0423, 0x0443, 0x0423, 0x0443, 0x0427, 0x0447, 0x04F6, 0x04F7,
+ /* U+04F8 */ 0x042B, 0x044B, 0x04FA, 0x04FB, 0x04FC, 0x04FD, 0x04FE, 0x04FF,
+};
+
+/* static */ const std::vector<int> CharUtils::EMPTY_STRING(1 /* size */, '\0' /* value */);
+} // namespace latinime
diff --git a/third_party/android_prediction/utils/char_utils.h b/third_party/android_prediction/utils/char_utils.h
new file mode 100644
index 0000000..dc44efb
--- /dev/null
+++ b/third_party/android_prediction/utils/char_utils.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_CHAR_UTILS_H
+#define LATINIME_CHAR_UTILS_H
+
+#include <cctype>
+#include <cstring>
+#include <vector>
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+class CharUtils {
+ public:
+ static AK_FORCE_INLINE bool isAsciiUpper(int c) {
+ // Note: isupper(...) reports false positives for some Cyrillic characters, causing them to
+ // be incorrectly lower-cased using toAsciiLower(...) rather than latin_tolower(...).
+ return (c >= 'A' && c <= 'Z');
+ }
+
+ static AK_FORCE_INLINE int toAsciiLower(int c) {
+ return c - 'A' + 'a';
+ }
+
+ static AK_FORCE_INLINE bool isAscii(int c) {
+ return isascii(c) != 0;
+ }
+
+ static AK_FORCE_INLINE int toLowerCase(const int c) {
+ if (isAsciiUpper(c)) {
+ return toAsciiLower(c);
+ }
+ if (isAscii(c)) {
+ return c;
+ }
+ return static_cast<int>(latin_tolower(static_cast<unsigned short>(c)));
+ }
+
+ static AK_FORCE_INLINE int toBaseLowerCase(const int c) {
+ return toLowerCase(toBaseCodePoint(c));
+ }
+
+ static AK_FORCE_INLINE bool isIntentionalOmissionCodePoint(const int codePoint) {
+ // TODO: Do not hardcode here
+ return codePoint == KEYCODE_SINGLE_QUOTE || codePoint == KEYCODE_HYPHEN_MINUS;
+ }
+
+ static AK_FORCE_INLINE int getCodePointCount(const int arraySize, const int *const codePoints) {
+ int size = 0;
+ for (; size < arraySize; ++size) {
+ if (codePoints[size] == '\0') {
+ break;
+ }
+ }
+ return size;
+ }
+
+ static AK_FORCE_INLINE int toBaseCodePoint(int c) {
+ if (c < BASE_CHARS_SIZE) {
+ return static_cast<int>(BASE_CHARS[c]);
+ }
+ return c;
+ }
+
+ static AK_FORCE_INLINE int getSpaceCount(const int *const codePointBuffer, const int length) {
+ int spaceCount = 0;
+ for (int i = 0; i < length; ++i) {
+ if (codePointBuffer[i] == KEYCODE_SPACE) {
+ ++spaceCount;
+ }
+ }
+ return spaceCount;
+ }
+
+ static AK_FORCE_INLINE int isInUnicodeSpace(const int codePoint) {
+ return codePoint >= MIN_UNICODE_CODE_POINT && codePoint <= MAX_UNICODE_CODE_POINT;
+ }
+
+ static unsigned short latin_tolower(const unsigned short c);
+ static const std::vector<int> EMPTY_STRING;
+
+ // Returns updated code point count. Returns 0 when the code points cannot be marked as a
+ // Beginning-of-Sentence.
+ static AK_FORCE_INLINE int attachBeginningOfSentenceMarker(int *const codePoints,
+ const int codePointCount, const int maxCodePoint) {
+ if (codePointCount > 0 && codePoints[0] == CODE_POINT_BEGINNING_OF_SENTENCE) {
+ // Marker has already been attached.
+ return codePointCount;
+ }
+ if (codePointCount >= maxCodePoint) {
+ // the code points cannot be marked as a Beginning-of-Sentence.
+ return 0;
+ }
+ memmove(codePoints + 1, codePoints, sizeof(int) * codePointCount);
+ codePoints[0] = CODE_POINT_BEGINNING_OF_SENTENCE;
+ return codePointCount + 1;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(CharUtils);
+
+ static const int MIN_UNICODE_CODE_POINT;
+ static const int MAX_UNICODE_CODE_POINT;
+
+ /**
+ * Table mapping most combined Latin, Greek, and Cyrillic characters
+ * to their base characters. If c is in range, BASE_CHARS[c] == c
+ * if c is not a combined character, or the base character if it
+ * is combined.
+ */
+ static const int BASE_CHARS_SIZE = 0x0500;
+ static const unsigned short BASE_CHARS[BASE_CHARS_SIZE];
+};
+} // namespace latinime
+#endif // LATINIME_CHAR_UTILS_H
diff --git a/third_party/android_prediction/utils/int_array_view.h b/third_party/android_prediction/utils/int_array_view.h
new file mode 100644
index 0000000..1d31fd6
--- /dev/null
+++ b/third_party/android_prediction/utils/int_array_view.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_INT_ARRAY_VIEW_H
+#define LATINIME_INT_ARRAY_VIEW_H
+
+#include <cstdint>
+#include <cstdlib>
+#include <vector>
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+/**
+ * Helper class used to provide a read-only view of a given range of integer array. This class
+ * does not take ownership of the underlying integer array but is designed to be a lightweight
+ * object that obeys value semantics.
+ *
+ * Example:
+ * <code>
+ * bool constinsX(IntArrayView view) {
+ * for (size_t i = 0; i < view.size(); ++i) {
+ * if (view[i] == 'X') {
+ * return true;
+ * }
+ * }
+ * return false;
+ * }
+ *
+ * const int codePointArray[] = { 'A', 'B', 'X', 'Z' };
+ * auto view = IntArrayView(codePointArray, NELEMS(codePointArray));
+ * const bool hasX = constinsX(view);
+ * </code>
+ */
+class IntArrayView {
+ public:
+ IntArrayView() : mPtr(nullptr), mSize(0) {}
+
+ IntArrayView(const int *const ptr, const size_t size)
+ : mPtr(ptr), mSize(size) {}
+
+ explicit IntArrayView(const std::vector<int> &vector)
+ : mPtr(vector.data()), mSize(vector.size()) {}
+
+ template <int N>
+ AK_FORCE_INLINE static IntArrayView fromFixedSizeArray(const int (&array)[N]) {
+ return IntArrayView(array, N);
+ }
+
+ // Returns a view that points one int object. Does not take ownership of the given object.
+ AK_FORCE_INLINE static IntArrayView fromObject(const int *const object) {
+ return IntArrayView(object, 1);
+ }
+
+ AK_FORCE_INLINE int operator[](const size_t index) const {
+ ASSERT(index < mSize);
+ return mPtr[index];
+ }
+
+ AK_FORCE_INLINE bool empty() const {
+ return size() == 0;
+ }
+
+ AK_FORCE_INLINE size_t size() const {
+ return mSize;
+ }
+
+ AK_FORCE_INLINE const int *data() const {
+ return mPtr;
+ }
+
+ AK_FORCE_INLINE const int *begin() const {
+ return mPtr;
+ }
+
+ AK_FORCE_INLINE const int *end() const {
+ return mPtr + mSize;
+ }
+
+ private:
+ DISALLOW_ASSIGNMENT_OPERATOR(IntArrayView);
+
+ const int *const mPtr;
+ const size_t mSize;
+};
+
+using WordIdArrayView = IntArrayView;
+using PtNodePosArrayView = IntArrayView;
+
+} // namespace latinime
+#endif // LATINIME_MEMORY_VIEW_H
diff --git a/third_party/android_prediction/utils/time_keeper.cpp b/third_party/android_prediction/utils/time_keeper.cpp
new file mode 100644
index 0000000..abd929e
--- /dev/null
+++ b/third_party/android_prediction/utils/time_keeper.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/utils/time_keeper.h"
+
+#include <ctime>
+
+namespace latinime {
+
+int TimeKeeper::sCurrentTime;
+bool TimeKeeper::sSetForTesting;
+
+/* static */ void TimeKeeper::setCurrentTime() {
+ if (!sSetForTesting) {
+ sCurrentTime = time(0);
+ }
+}
+
+/* static */ void TimeKeeper::startTestModeWithForceCurrentTime(const int currentTime) {
+ sCurrentTime = currentTime;
+ sSetForTesting = true;
+}
+
+/* static */ void TimeKeeper::stopTestMode() {
+ sSetForTesting = false;
+}
+
+} // namespace latinime
diff --git a/third_party/android_prediction/utils/time_keeper.h b/third_party/android_prediction/utils/time_keeper.h
new file mode 100644
index 0000000..bfed409
--- /dev/null
+++ b/third_party/android_prediction/utils/time_keeper.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_TIME_KEEPER_H
+#define LATINIME_TIME_KEEPER_H
+
+#include "third_party/android_prediction/defines.h"
+
+namespace latinime {
+
+class TimeKeeper {
+ public:
+ static void setCurrentTime();
+
+ static void startTestModeWithForceCurrentTime(const int currentTime);
+
+ static void stopTestMode();
+
+ static int peekCurrentTime() { return sCurrentTime; };
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(TimeKeeper);
+
+ static int sCurrentTime;
+ static bool sSetForTesting;
+};
+} // namespace latinime
+#endif /* LATINIME_TIME_KEEPER_H */
diff --git a/third_party/freetype b/third_party/freetype
new file mode 160000
index 0000000..fd6919a
--- /dev/null
+++ b/third_party/freetype
@@ -0,0 +1 @@
+Subproject commit fd6919ac23f74b876c209aba5eaa2be662086391