Add json results format validation

This commit is contained in:
2025-01-07 11:58:18 +01:00
parent 0318dcf8e5
commit 4901bb1f32
6 changed files with 305 additions and 2 deletions

View File

@@ -15,7 +15,7 @@ endif ()
# Global CMake variables # Global CMake variables
# ---------------------- # ----------------------
set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF) set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

View File

@@ -4,7 +4,7 @@ SHELL := /bin/bash
f_release = build_release f_release = build_release
f_debug = build_debug f_debug = build_debug
app_targets = b_best b_list b_main b_manage b_grid app_targets = b_best b_list b_main b_manage b_grid b_results
test_targets = unit_tests_platform test_targets = unit_tests_platform
define ClearTests define ClearTests

View File

@@ -67,3 +67,6 @@ add_executable(
main/Scores.cpp main/Scores.cpp
) )
target_link_libraries(b_manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" fimdlp "${BayesNet}") target_link_libraries(b_manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" fimdlp "${BayesNet}")
# b_results
add_executable(b_results commands/b_results.cpp)

View File

@@ -0,0 +1,68 @@
#include <iostream>
#include <filesystem>
#include <fstream>
#include <vector>
#include <regex>
#include <nlohmann/json.hpp>
#include "common/Paths.h"
#include "results/JsonValidator.h"
#include "results/SchemaV1_0.h"
using json = nlohmann::json;
namespace fs = std::filesystem;
void header(const std::string& message, int length, const std::string& symbol)
{
std::cout << std::string(length + 11, symbol[0]) << std::endl;
std::cout << symbol << " " << std::setw(length + 7) << std::left << message << " " << symbol << std::endl;
std::cout << std::string(length + 11, symbol[0]) << std::endl;
}
int main(int argc, char* argv[])
{
std::string nameSuffix = "results_";
std::string schemaVersion = "1.0";
bool fix_it = false;
std::vector<std::string> result_files;
int max_length = 0;
// Load the result files and find the longest file name
for (const auto& entry : fs::directory_iterator(platform::Paths::results())) {
if (entry.is_regular_file() && entry.path().filename().string().starts_with(nameSuffix) && entry.path().filename().string().ends_with(".json")) {
std::string fileName = entry.path().string();
if (fileName.length() > max_length) {
max_length = fileName.length();
}
result_files.push_back(fileName);
}
}
// Process the result files
if (result_files.empty()) {
std::cerr << "Error: No result files found." << std::endl;
return 1;
}
std::string header_message = "Processing " + std::to_string(result_files.size()) + " result files.";
header(header_message, max_length, "*");
platform::JsonValidator validator(platform::SchemaV1_0::schema);
int n_errors = 0;
for (const auto& file_name : result_files) {
std::vector<std::string> errors = validator.validate(file_name);
if (!errors.empty()) {
n_errors++;
std::cout << std::setw(max_length) << std::left << file_name << ": " << errors.size() << " Errors:" << std::endl;
for (const auto& error : errors) {
std::cout << " - " << error << std::endl;
}
if (fix_it) {
validator.fix_it(file_name);
std::cout << " -> File fixed." << std::endl;
}
}
}
if (n_errors == 0) {
header("All files are valid.", max_length, "*");
} else {
std::string $verb = (fix_it) ? "had" : "have";
std::string msg = std::to_string(n_errors) + " files " + $verb + " errors.";
header(msg, max_length, "*");
}
return 0;
}

129
src/results/JsonValidator.h Normal file
View File

@@ -0,0 +1,129 @@
#ifndef JSONVALIDATOR_H
#define JSONVALIDATOR_H
#include <fstream>
#include <vector>
#include <regex>
#include <nlohmann/json.hpp>
namespace platform {
using json = nlohmann::ordered_json;
class JsonValidator {
public:
JsonValidator(const json& schema) : schema(schema) {}
std::vector<std::string> validate(const std::string& fileName)
{
std::ifstream file(fileName);
if (!file.is_open()) {
return { "Error: Unable to open file." };
}
json data;
try {
file >> data;
}
catch (const json::parse_error& e) {
return { "Error: JSON parsing failed: " + std::string(e.what()) };
}
std::vector<std::string> errors;
// Validate all fields defined in the schema
for (const auto& [key, value] : schema["properties"].items()) {
validateField(key, value, data, errors);
}
return errors;
}
void fix_it(const std::string& fileName)
{
std::ifstream file(fileName);
if (!file.is_open()) {
std::cerr << "Error: Unable to open file for fixing." << std::endl;
return;
}
json data;
try {
file >> data;
}
catch (const json::parse_error& e) {
std::cerr << "Error: JSON parsing failed: " << e.what() << std::endl;
return;
}
file.close();
// Fix fields
for (const auto& [key, value] : schema["properties"].items()) {
if (!data.contains(key)) {
// Set default value if specified in the schema
if (value.contains("default")) {
data[key] = value["default"];
} else if (value["type"] == "array") {
data[key] = json::array();
} else if (value["type"] == "object") {
data[key] = json::object();
} else {
data[key] = nullptr;
}
}
// Fix const fields to match the schema value
if (value.contains("const")) {
data[key] = value["const"];
}
}
// Save fixed JSON
std::ofstream outFile(fileName);
if (!outFile.is_open()) {
std::cerr << "Error: Unable to open file for writing." << std::endl;
return;
}
outFile << data.dump(4);
outFile.close();
}
private:
json schema;
void validateField(const std::string& field, const json& value, const json& data, std::vector<std::string>& errors)
{
// Check if the field is present
if (!data.contains(field)) {
errors.push_back("Missing required field: " + field);
return;
}
// Check for type constraints
if (value.contains("type")) {
const std::string type = value["type"];
if (type == "string" && !data[field].is_string()) {
errors.push_back("Field '" + field + "' should be a string.");
} else if (type == "number" && !data[field].is_number()) {
errors.push_back("Field '" + field + "' should be a number.");
} else if (type == "integer" && !data[field].is_number_integer()) {
errors.push_back("Field '" + field + "' should be an integer.");
} else if (type == "boolean" && !data[field].is_boolean()) {
errors.push_back("Field '" + field + "' should be a boolean.");
} else if (type == "array" && !data[field].is_array()) {
errors.push_back("Field '" + field + "' should be an array.");
} else if (type == "object" && !data[field].is_object()) {
errors.push_back("Field '" + field + "' should be an object.");
}
}
// Check for const constraints
if (value.contains("const")) {
const auto& expectedValue = value["const"];
if (data[field] != expectedValue) {
errors.push_back("Field '" + field + "' has an invalid value. Expected: " +
expectedValue.dump() + ", Found: " + data[field].dump());
}
}
}
};
}
#endif

103
src/results/SchemaV1_0.h Normal file
View File

@@ -0,0 +1,103 @@
#ifndef SCHEMAV1_0_H
#define SCHEMAV1_0_H
#include <nlohmann/json.hpp>
namespace platform {
using json = nlohmann::ordered_json;
class SchemaV1_0 {
public:
// Define JSON schema
const static json schema;
};
const json SchemaV1_0::schema = {
{"$schema", "http://json-schema.org/draft-07/schema#"},
{"type", "object"},
{"properties", {
{"schema_version", {
{"type", "string"},
{"pattern", "^\\d+\\.\\d+$"},
{"default", "1.0"},
{"const", "1.0"} // Fixed schema version for this schema
}},
{"date", {{"type", "string"}, {"format", "date"}}},
{"time", {{"type", "string"}, {"pattern", "^\\d{2}:\\d{2}:\\d{2}$"}}},
{"title", {{"type", "string"}}},
{"language", {{"type", "string"}}},
{"language_version", {{"type", "string"}}},
{"discretized", {{"type", "boolean"}, {"default", false}}},
{"model", {{"type", "string"}}},
{"platform", {{"type", "string"}}},
{"stratified", {{"type", "boolean"}, {"default", false}}},
{"folds", {{"type", "integer"}, {"default", 0}}},
{"score_name", {{"type", "string"}}},
{"version", {{"type", "string"}}},
{"duration", {{"type", "number"}, {"default", 0}}},
{"results", {
{"type", "array"},
{"items", {
{"type", "object"},
{"properties", {
{"scores_train", {{"type", "array"}, {"items", {{"type", "number"}}}}},
{"scores_test", {{"type", "array"}, {"items", {{"type", "number"}}}}},
{"times_train", {{"type", "array"}, {"items", {{"type", "number"}}}}},
{"times_test", {{"type", "array"}, {"items", {{"type", "number"}}}}},
{"notes", {{"type", "array"}, {"items", {{"type", "string"}}}}},
{"train_time", {{"type", "number"}, {"default", 0}}},
{"train_time_std", {{"type", "number"}, {"default", 0}}},
{"test_time", {{"type", "number"}, {"default", 0}}},
{"test_time_std", {{"type", "number"}, {"default", 0}}},
{"samples", {{"type", "integer"}, {"default", 0}}},
{"features", {{"type", "integer"}, {"default", 0}}},
{"classes", {{"type", "integer"}, {"default", 0}}},
{"hyperparameters", {
{"type", "object"},
{"additionalProperties", {
{"oneOf", {
{{"type", "number"}}, // Field can be a number
{{"type", "string"}} // Field can also be a string
}}
}}
}},
{"score", {{"type", "number"}, {"default", 0}}},
{"score_train", {{"type", "number"}, {"default", 0}}},
{"score_std", {{"type", "number"}, {"default", 0}}},
{"score_train_std", {{"type", "number"}, {"default", 0}}},
{"time", {{"type", "number"}, {"default", 0}}},
{"time_std", {{"type", "number"}, {"default", 0}}},
{"nodes", {{"type", "number"}, {"default", 0}}},
{"leaves", {{"type", "number"}, {"default", 0}}},
{"depth", {{"type", "number"}, {"default", 0}}},
{"dataset", {{"type", "string"}}},
{"confusion_matrices", {
{"type", "array"},
{"items", {
{"type", "object"},
{"patternProperties", {
{".*", {
{"type", "array"},
{"items", {{"type", "integer"}}}
}}
}},
{"additionalProperties", false}
}}
}}
}},
{"required", {
"scores_train", "scores_test", "times_train", "times_test",
"notes", "train_time", "train_time_std", "test_time", "test_time_std",
"samples", "features", "classes", "hyperparameters", "score", "score_train",
"score_std", "score_train_std", "time", "time_std", "nodes", "leaves",
"depth", "dataset", "confusion_matrices"
}}
}}
}}
}},
{"required", {
"schema_version", "date", "time", "title", "language", "language_version",
"discretized", "model", "platform", "stratified", "folds", "score_name",
"version", "duration", "results"
}}
};
}
#endif