Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • ads/learned-zindex
1 result
Show changes
Commits on Source (2)
/**
* @file train_test_zindex.cpp
* @author Sachith (sachith.pai@helsinki.fi)
* @brief File to train and test the flood index
* @version 0.1
* @date 2022-05-04
*
* @copyright Copyright (c) 2022
*
*/
#include<iostream>
#include <vector>
#include <algorithm>
#include <time.h>
#include <random>
#include <chrono>
#include <fstream>
#include <stdlib.h>
#include<set>
#include<map>
#include<string>
#include"floodlite.h"
#include "../toml11-master/toml.hpp"
#include "../json.hpp"
using namespace std;
using json = nlohmann::json; // using this to dump various logs.
#define addeb if(0)
const string data_path = "../Datasets/";
int main(int argc, char* argv[])
{
const auto data_folder = string(argv[2]);
const auto experiment_name = string(argv[3]);
const auto config = toml::parse(data_path+data_folder+"/Experiments/"+experiment_name+"/config/"+string(argv[1])+".toml");
const auto point_class = toml::find<std::string>(config, "point_class");
const auto point_file = toml::find<std::string>(config, "point_file");
const auto query_file = toml::find<std::string>(config, "query_file");
cout.precision(17);
vector<Point> data;
double_t a, b, c, d;
ifstream pointsfile(data_path+data_folder+"/DataPoints/"+point_class+"/"+point_file);
while ( pointsfile >> a >> b)
data.push_back(Point(a,b));
pointsfile.close();
cout<<data_path+data_folder+"/DataPoints/"+point_class+"/"+point_file<<endl;
uint32_t insert_increments = uint32_t(data.size()*0.1);
cout<<"Finishe reading data "<<data.size()<<endl;
vector<pair<Point,Point>> queries;
ifstream queriesfile(data_path+data_folder+"/Queries/RangeQueries/"+query_file);
while (queriesfile >> a >> b >> c >> d)
queries.push_back(make_pair(Point(a,b),Point(c,d)));
queriesfile.close();
uint32_t insert_rq_size = uint32_t(queries.size()*0.05);
int page_size = toml::find<std::int32_t>(config, "page_size");
cout<<"Finishe reading data n queries "<<data.size()<<" , "<<queries.size()<<"\n";
cout.flush();
auto flood_train_start = std::chrono::high_resolution_clock::now();
FloodLite flood_obj = FloodLite(data,queries,page_size);
auto flood_train_end = std::chrono::high_resolution_clock::now();
cout<<"Finished training FLOOD"<<endl;
//############ FLOOD ############
{
json flood_json;
flood_json["model"]="FLOOD";
flood_json["query_file"] = query_file;
flood_json["point_class"] = point_class;
flood_json["point_file"] = point_file;
flood_json["build_time"] = chrono::duration_cast<chrono::seconds>(flood_train_end - flood_train_start).count();
flood_json["config_id"] = string(argv[1]);
uint64_t result_size =0;
auto flood_eval_start = std::chrono::high_resolution_clock::now();
for(auto &query: queries){
vector<Point> range_query_result = flood_obj.RangeQuery(query);
result_size+=range_query_result.size();
}
auto flood_eval_end = std::chrono::high_resolution_clock::now();
flood_json["range_result_size"]=result_size;
flood_json["page_count"]=flood_obj.page_cnt_;
flood_json["node_count"]=0;
flood_json["index_size"]=flood_obj.ModelSize();
flood_json["range_query_time"] = chrono::duration_cast<chrono::nanoseconds>(flood_eval_end - flood_eval_start).count()/queries.size();
flood_json["range_query_scantime"] = flood_obj.TimeSpentScanningPages()/queries.size();
flood_json["range_query_page_accessed"]=flood_obj.NumPagesAcessed()/queries.size();
flood_json["range_query_points_scanned"]=flood_obj.NumElementsScanned()/queries.size();
ofstream o(data_path+data_folder+"/Experiments/"+experiment_name+"/result/Range.json",ios_base::app);
o << flood_json << std::endl;
o.close();
}
cout<<"argv[4] :"<<argv[4]<<endl;
if(atoi(argv[4])){
vector<Point> knn_queries;
ifstream knn_queriesfile(data_path+data_folder+"/Queries/KnnQueries/"+point_class);
while (knn_queriesfile >> a >> b)
knn_queries.push_back(Point(a,b));
knn_queriesfile.close();
vector<uint32_t> k_values = toml::find<std::vector<uint32_t>>(config, "knn_k_values");
vector<Point> point_queries;
ifstream point_queriesfile(data_path+data_folder+"/Queries/PointQueries/"+point_class);
while (point_queriesfile >> a >> b)
point_queries.push_back(Point(a,b));
point_queriesfile.close();
vector<Point> insert_queries;
ifstream insert_queriesfile(data_path+data_folder+"/Queries/InsertQueries/"+point_class);
while (insert_queriesfile >> a >> b)
insert_queries.push_back(Point(a,b));
insert_queriesfile.close();
{ // KNN querys
json flood_json;
flood_json["model"]="FLOOD";
flood_json["point_class"] = point_class;
flood_json["point_file"] = point_file;
flood_json["config_id"] = string(argv[1]);
vector<Point> knn_query_result;
for(auto &k : k_values){
flood_obj.ClearMetric();
auto flood_eval_start = std::chrono::high_resolution_clock::now();
for(auto &query: knn_queries){
knn_query_result = flood_obj.KNNQuery(query,k);
}
auto flood_eval_end = std::chrono::high_resolution_clock::now();
flood_json["k"]=k;
flood_json["knn_query_time"]=chrono::duration_cast<chrono::nanoseconds>(flood_eval_end - flood_eval_start).count()/knn_queries.size();
flood_json["knn_query_scantime"] = flood_obj.TimeSpentScanningPages()/knn_queries.size();
flood_json["knn_query_page_accessed"]=flood_obj.NumPagesAcessed()/knn_queries.size();
flood_json["knn_query_points_scanned"]=flood_obj.NumElementsScanned()/knn_queries.size();
ofstream o(data_path+data_folder+"/Experiments/"+experiment_name+"/result/KNN.json",ios_base::app);
o << flood_json << std::endl;
o.close();
}
}
{ // Point queries
json flood_json;
flood_json["model"]="FLOOD";
flood_json["point_class"] = point_class;
flood_json["point_file"] = point_file;
flood_json["config_id"] = string(argv[1]);
bool point_query_result;
auto flood_eval_start = std::chrono::high_resolution_clock::now();
for(auto &query: point_queries){
point_query_result = flood_obj.PointQuery(query);
}
auto flood_eval_end = std::chrono::high_resolution_clock::now();
flood_json["point_query_time"]=chrono::duration_cast<chrono::nanoseconds>(flood_eval_end - flood_eval_start).count()/point_queries.size();
ofstream o(data_path+data_folder+"/Experiments/"+experiment_name+"/result/Point.json",ios_base::app);
o << flood_json << std::endl;
o.close();
}
cout<<"Point and KNN query"<<endl;
{ // Insert queries
json flood_json;
flood_json["model"]="FLOOD";
flood_json["point_class"] = point_class;
flood_json["point_file"] = point_file;
flood_json["config_id"] = string(argv[1]);
vector<uint64_t> insert_times;
vector<uint64_t> range_query_times;
uint32_t ins_ix=0;
for(int ins_epoch =0;ins_epoch<5;ins_epoch++){
auto flood_eval_start = std::chrono::high_resolution_clock::now();
for(int j=0;j<insert_increments;j++,ins_ix++){
// cout<<insert_queries[ins_ix].x_<<" "<<insert_queries[ins_ix].y_<<endl;
flood_obj.InsertElement(insert_queries[ins_ix]);
}
auto flood_eval_end = std::chrono::high_resolution_clock::now();
insert_times.push_back(chrono::duration_cast<chrono::nanoseconds>(flood_eval_end - flood_eval_start).count()/insert_increments);
// cout<<" ############## EPOCH: "<<ins_epoch<<" Done ############## "<<endl;
auto flood_rq_eval_start = std::chrono::high_resolution_clock::now();
for(int i=0;i<insert_rq_size;i++){
vector<Point> range_query_result = flood_obj.RangeQuery(queries[i]);
}
auto flood_rq_eval_end = std::chrono::high_resolution_clock::now();
range_query_times.push_back(chrono::duration_cast<chrono::nanoseconds>(flood_rq_eval_end - flood_rq_eval_start).count()/insert_rq_size);
}
flood_json["insert_query_time"]= insert_times;
flood_json["range_query_times"]= range_query_times;
ofstream o(data_path+data_folder+"/Experiments/"+experiment_name+"/result/Insert.json",ios_base::app);
o << flood_json << std::endl;
o.close();
}
}
cout<<"FLOOD DONE\n";
cout.flush();
return 0;
}
\ No newline at end of file
This diff is collapsed.
/**
* @file train_test_zindex.cpp
* @author Sachith (sachith.pai@helsinki.fi)
* @brief File to train and test the zindex.
* @version 0.1
* @date 2022-05-04
*
* @copyright Copyright (c) 2022
*
*/
#include<iostream>
#include <vector>
#include <algorithm>
#include <time.h>
#include <random>
#include <chrono>
#include <fstream>
#include <stdlib.h>
#include<set>
#include<map>
#include<string>
#include"quilts.h"
#include "../toml11-master/toml.hpp"
#include "../json.hpp"
using namespace std;
using json = nlohmann::json; // using this to dump various logs.
const string data_path = "../Datasets/";
#define pddii pair<pair<double_t,double_t>,pair<uint32_t,uint32_t>>
bool sortbyfirst(const pddii &a, const pddii &b){return (a.first.first < b.first.first);}
bool sortbysecond(const pddii &a, const pddii &b){return (a.first.second < b.first.second);}
void rankspaceprojection(vector<pddii> &arr){
sort(arr.begin(), arr.end(), sortbyfirst);
for(int i=0;i<arr.size();i++)
arr[i].second.first = i;
sort(arr.begin(), arr.end(), sortbysecond);
for(int i=0;i<arr.size();i++)
arr[i].second.second = i;
}
int main(int argc, char* argv[])
{
const auto data_folder = string(argv[2]);
const auto experiment_name = string(argv[3]);
const auto config = toml::parse(data_path+data_folder+"/Experiments/"+experiment_name+"/config/"+string(argv[1])+".toml");
const auto point_class = toml::find<std::string>(config, "point_class");
const auto point_file = toml::find<std::string>(config, "point_file");
const auto query_file = toml::find<std::string>(config, "query_file");
cout.precision(17);
vector<pddii> data_raw;
vector<double_t> x_values;
vector<double_t> y_values;
double_t a, b, c, d;
ifstream pointsfile(data_path+data_folder+"/DataPoints/"+point_class+"/"+point_file);
while ( pointsfile >> a >> b){
data_raw.push_back(make_pair(make_pair(a,b),make_pair(0,0)));
x_values.push_back(a);
y_values.push_back(b);
}
pointsfile.close();
rankspaceprojection(data_raw);
sort(x_values.begin(), x_values.end());
sort(y_values.begin(), y_values.end());
vector<Point> data;
for(int i=0;i<data_raw.size();i++){
data.push_back(Point(data_raw[i].second.first,data_raw[i].second.second));
}
uint32_t insert_increments = uint32_t(data.size()*0.1);
cout<<"Finishe reading data "<<data.size()<<endl;
vector<pair<Point,Point>> queries;
ifstream queriesfile(data_path+data_folder+"/Queries/RangeQueries/"+query_file);
while (queriesfile >> a >> b >> c >> d){
uint32_t rank_a = (lower_bound(x_values.begin(),x_values.end(),a)-x_values.begin());
uint32_t rank_b = (lower_bound(y_values.begin(),y_values.end(),b)-y_values.begin());
uint32_t rank_c = (lower_bound(x_values.begin(),x_values.end(),c)-x_values.begin());
uint32_t rank_d = (lower_bound(y_values.begin(),y_values.end(),d)-y_values.begin());
queries.push_back(make_pair(Point(rank_a,rank_b),Point(rank_c,rank_d)));
}
uint32_t insert_rq_size = uint32_t(queries.size()*0.05);
queriesfile.close();
int page_size = toml::find<std::int32_t>(config, "page_size");
cout<<"Finishe reading data n queries "<<data.size()<<" , "<<queries.size()<<"\n";
cout.flush();
auto quilts_train_start = std::chrono::high_resolution_clock::now();
Quilts quilts_obj = Quilts(data,queries,page_size);
auto quilts_train_end = std::chrono::high_resolution_clock::now();
cout<<"Finished training QUILTS"<<endl;
//############ QUILTS ############
{ //range query
json quilts_json;
quilts_json["model"]="QUILTS";
quilts_json["query_file"] = query_file;
quilts_json["point_class"] = point_class;
quilts_json["point_file"] = point_file;
quilts_json["build_time"] = chrono::duration_cast<chrono::seconds>(quilts_train_end - quilts_train_start).count();
quilts_json["config_id"] = string(argv[1]);
uint64_t result_size =0;
auto quilts_eval_start = std::chrono::high_resolution_clock::now();
for(auto &query: queries){
vector<Point> range_query_result = quilts_obj.RangeQuery(query);
result_size+=range_query_result.size();
}
auto quilts_eval_end = std::chrono::high_resolution_clock::now();
quilts_json["range_result_size"]=result_size;
quilts_json["page_count"]=quilts_obj.page_cnt_;
quilts_json["node_count"]=quilts_obj.node_cnt_;
quilts_json["index_size"]=quilts_obj.ModelSize();
quilts_json["range_query_time"] = chrono::duration_cast<chrono::nanoseconds>(quilts_eval_end - quilts_eval_start).count()/queries.size();
quilts_json["range_query_scantime"] = quilts_obj.TimeSpentScanningPages()/queries.size();
quilts_json["range_query_page_accessed"]=quilts_obj.NumPagesAcessed()/queries.size();
quilts_json["range_query_points_scanned"]=quilts_obj.NumElementsScanned()/queries.size();
ofstream o(data_path+data_folder+"/Experiments/"+experiment_name+"/result/Range.json",ios_base::app);
o << quilts_json << std::endl;
o.close();
}
cout<<"Finished Range query QUILTS"<<endl;
if(atoi(argv[4])){
vector<Point> knn_queries;
ifstream knn_queriesfile(data_path+data_folder+"/Queries/KnnQueries/"+point_class);
while (knn_queriesfile >> a >> b){
uint32_t rank_a = (lower_bound(x_values.begin(),x_values.end(),a)-x_values.begin());
uint32_t rank_b = (lower_bound(y_values.begin(),y_values.end(),b)-y_values.begin());
knn_queries.push_back(Point(rank_a,rank_b));
}
knn_queriesfile.close();
vector<uint32_t> k_values = toml::find<std::vector<uint32_t>>(config, "knn_k_values");
cout<<"Finished Reading KNN QUILTS"<<endl;
vector<Point> point_queries;
ifstream point_queriesfile(data_path+data_folder+"/Queries/PointQueries/"+point_class);
while (point_queriesfile >> a >> b){
uint32_t rank_a = (lower_bound(x_values.begin(),x_values.end(),a)-x_values.begin());
uint32_t rank_b = (lower_bound(y_values.begin(),y_values.end(),b)-y_values.begin());
point_queries.push_back(Point(rank_a,rank_b));
}
point_queriesfile.close();
cout<<"Finished reading Point QUILTS"<<endl;
vector<Point> insert_queries;
ifstream insert_queriesfile(data_path+data_folder+"/Queries/InsertQueries/"+point_class);
while (insert_queriesfile >> a >> b){
uint32_t rank_a = (lower_bound(x_values.begin(),x_values.end(),a)-x_values.begin());
uint32_t rank_b = (lower_bound(y_values.begin(),y_values.end(),b)-y_values.begin());
insert_queries.push_back(Point(rank_a,rank_b));
}
insert_queriesfile.close();
cout<<"Finished reading insert QUILTS"<<endl;
cout<<"Starting Knn"<<endl;
{ // KNN querys
json quilts_json;
quilts_json["model"]="QUILTS";
quilts_json["point_class"] = point_class;
quilts_json["point_file"] = point_file;
quilts_json["config_id"] = string(argv[1]);
vector<Point> knn_query_result;
for(auto &k : k_values){
quilts_json["k"] = k;
quilts_obj.ClearMetric();
auto quilts_eval_start = std::chrono::high_resolution_clock::now();
for(auto &query: knn_queries){
// cout<<"KNN query "<<query.x_<<" "<<query.y_<<endl;
knn_query_result = quilts_obj.KNNQuery(query,k);
}
auto quilts_eval_end = std::chrono::high_resolution_clock::now();
quilts_json["knn_query_time"]=chrono::duration_cast<chrono::nanoseconds>(quilts_eval_end - quilts_eval_start).count()/knn_queries.size();
quilts_json["knn_query_scantime"] = quilts_obj.TimeSpentScanningPages()/knn_queries.size();
quilts_json["knn_query_page_accessed"]=quilts_obj.NumPagesAcessed()/knn_queries.size();
quilts_json["knn_query_points_scanned"]=quilts_obj.NumElementsScanned()/knn_queries.size();
std::cout<<"QUILTS knn k:"<<k<<std::endl;
ofstream o(data_path+data_folder+"/Experiments/"+experiment_name+"/result/KNN.json",ios_base::app);
o << quilts_json << std::endl;
o.close();
}
}
cout<<"KNN DOne"<<endl;
{ // Point queries
json quilts_json;
quilts_json["model"]="QUILTS";
quilts_json["point_class"] = point_class;
quilts_json["point_file"] = point_file;
quilts_json["config_id"] = string(argv[1]);
bool point_query_result;
auto quilts_eval_start = std::chrono::high_resolution_clock::now();
for(auto &query: point_queries){
point_query_result = quilts_obj.PointQuery(query);
}
auto quilts_eval_end = std::chrono::high_resolution_clock::now();
quilts_json["point_query_time"]=chrono::duration_cast<chrono::nanoseconds>(quilts_eval_end - quilts_eval_start).count()/point_queries.size();
ofstream o(data_path+data_folder+"/Experiments/"+experiment_name+"/result/Point.json",ios_base::app);
o << quilts_json << std::endl;
o.close();
}
cout<<"Point Queries DOne"<<endl;
{ // Insert queries
json quilts_json;
quilts_json["model"]="QUILTS";
quilts_json["point_class"] = point_class;
quilts_json["point_file"] = point_file;
quilts_json["config_id"] = string(argv[1]);
vector<uint64_t> insert_times;
vector<uint64_t> range_query_times;
uint32_t ins_ix=0;
for(int ins_epoch =0;ins_epoch<5;ins_epoch++){
auto quilts_eval_start = std::chrono::high_resolution_clock::now();
for(int j=0;j<insert_increments;j++,ins_ix++){
quilts_obj.InsertElement(insert_queries[ins_ix]);
}
auto quilts_eval_end = std::chrono::high_resolution_clock::now();
insert_times.push_back(chrono::duration_cast<chrono::nanoseconds>(quilts_eval_end - quilts_eval_start).count()/insert_increments);
auto quilts_rq_eval_start = std::chrono::high_resolution_clock::now();
for(int i=0;i<insert_rq_size;i++){
vector<Point> range_query_result = quilts_obj.RangeQuery(queries[i]);
}
auto quilts_rq_eval_end = std::chrono::high_resolution_clock::now();
range_query_times.push_back(chrono::duration_cast<chrono::nanoseconds>(quilts_rq_eval_end - quilts_rq_eval_start).count()/insert_rq_size);
}
quilts_json["insert_query_time"]= insert_times;
quilts_json["range_query_times"]= range_query_times;
ofstream o(data_path+data_folder+"/Experiments/"+experiment_name+"/result/Insert.json",ios_base::app);
o << quilts_json << std::endl;
o.close();
}
cout<<"Insert DOne"<<endl;
}
cout<<"QUILTS DONE\n";
cout.flush();
return 0;
}
\ No newline at end of file
This diff is collapsed.