Commit f69291a9 authored by Witold Dyrka's avatar Witold Dyrka

version 3.1.0

parent 41f05f09
This diff is collapsed.
clist=./example/hets.cmap
cmode=at_least
fasta=./example/hets.fasta
lex=./example/hets.lex.wcfg
struct=./example/hets.struct.wcfg
......
clist=./example/hets.cmap
cmode=at_least
fasta=./example/hets.fasta
lex=./example/hets.lex.wcfg
struct=./example/hets.struct.wcfg
......
......@@ -3,6 +3,7 @@
#ifndef PGE_COMMON_OPTIONS_H_
#define PGE_COMMON_OPTIONS_H_
#include <omp.h>
#include <boost/program_options.hpp>
#include <memory>
......@@ -37,8 +38,12 @@ inline std::string AsString(const Container &vals) {
/// Base class for parsing, validation and storage of options.
class CommonOptions {
public:
/// True iff --help flag was given.
bool help;
/// Select mode for parsing with contact constraints
std::set<std::string> cmode_values{"at_least", "at_most", "exactly"};
std::string cmode;
/// Switch between Viterbi and Baum-Welch parsing.
bool viterbi;
/// Output directory.
......@@ -96,6 +101,7 @@ class CommonOptions {
virtual void PrintOptions(std::ostream &outstream) const {
if (!contacts.empty()) {
outstream << "clist=" << contacts << '\n';
outstream << "cmode=" << cmode << '\n';
}
if (!conf.empty()) {
outstream << "conf=" << conf << '\n';
......@@ -117,6 +123,8 @@ class CommonOptions {
using boost::program_options::value;
RegisterOption("clist", value(&contacts),
"list of contact pairs (file, optional)");
RegisterOption("cmode", value(&cmode)->default_value("at_least"),
("contact constraints mode " + AsString(cmode_values)).c_str());
RegisterOption("conf", value(&conf),
"program configuration (file, optional)");
RegisterOption("fasta", value(&sequences),
......@@ -128,8 +136,11 @@ class CommonOptions {
"output directory");
RegisterOption("struct", value(&structural_part),
"structural part of grammar (file)");
RegisterOption("threads", value(&threads)->default_value(INT_MAX),
"number of worker threads in use");
int num_threads;
#pragma omp parallel
{ num_threads = omp_get_num_threads(); }
RegisterOption("threads", value(&threads)->default_value(num_threads),
"number of worker threads in use (default: all)");
RegisterOption("viterbi", bool_switch(&viterbi),
"switch from Baum-Welch (sum) to Viterbi (max) parsing");
}
......@@ -145,7 +156,7 @@ class CommonOptions {
/// Implementation of Validate().
///
/// Correct override should call base method first and fail if base does.
virtual bool ValidateOptions() { return true; }
virtual bool ValidateOptions() { return cmode_values.count(cmode); }
private:
/// Details about parsing and usage of options.
......
......@@ -40,6 +40,31 @@ BipartiteGrammar<char> EstimateNull0() {
return {null_mapping, null_rs};
}
BipartiteGrammar<char> EstimateNullEqual(Grammar<char> lexical) {
static std::unordered_map<char, int> ownAlphabet;
int counter = 0;
for (const Rule<char> &rule : lexical.rules)
if (ownAlphabet.count(rule.rhs[0])==0)
ownAlphabet.emplace(rule.rhs[0], counter++);
Eigen::VectorXf b = Eigen::VectorXf::Constant(counter, 1.0/counter);
Grammar<int> null_rs;
null_rs.add({1, {0, 1}, 1.0});
null_rs.start = 1;
Grammar<char> null_mapping;
for (auto it = ownAlphabet.begin(); it != ownAlphabet.end(); ++it) {
null_mapping.add({0, {it->first}, b(it->second) / b.sum()});
}
null_mapping.start = -1;
return {null_mapping, null_rs};
}
BipartiteGrammar<char> EstimateNull1(Grammar<char> lexical) {
Eigen::VectorXf b =
Eigen::VectorXf::Zero(static_cast<int64_t>(alphabet.size() + 1));
......
......@@ -11,4 +11,6 @@ BipartiteGrammar<char> EstimateNull0();
/// Null hypothesis no 1 - based on lexical grammar.
BipartiteGrammar<char> EstimateNull1(Grammar<char> lexical);
BipartiteGrammar<char> EstimateNullEqual(Grammar<char> lexical);
#endif // PGE_NULL_H_
This diff is collapsed.
......@@ -45,14 +45,22 @@ struct CkyParser {
void ParseWindows(const PreParser &pp, int width, bool viterbi = false);
/// Parse sequence and return probability of derivation from start symbol.
double ParseSample(const PreParser &pp, bool viterbi = false);
double ParseSample(const PreParser &pp, const std::string &cmode, bool viterbi = false);
/// Parse neccesarily using contact rule for contact pairs in map.
/// Call after PreParser::PreParseWithContacts().
void Parse(const PreParser &pp, int begin, int length, bool viterbi = false);
void Parse(const PreParser &pp, int begin, int length, const std::string &cmode, bool viterbi = false);
void ParseAtLeast(const PreParser &pp, int begin, int length, bool viterbi = false);
void ParseAtMost (const PreParser &pp, int begin, int length, bool viterbi = false);
void ParseExactly(const PreParser &pp, int begin, int length, bool viterbi = false);
/// Parse sequence and build jungle of parse trees.
void ParseTree(const PreParser &pp, int begin, int length);
void ParseTree(const PreParser &pp, int begin, int length, const std::string &cmode);
void ParseAtLeastTree(const PreParser &pp, int begin, int length);
void ParseAtMostTree (const PreParser &pp, int begin, int length);
void ParseExactlyTree(const PreParser &pp, int begin, int length);
/// Parse fragments of given width and build jungle of parse trees.
void ParseTreeWindows(const PreParser &pp, int width);
......
......@@ -61,7 +61,7 @@ class PcfgEvolveOptions : public CommonOptions {
"galib configuration (file, optional)");
RegisterOption("grammar-flush-frequency",
value(&grammar_flush_frequency)->default_value(0),
"grammar flush frequency (optional)");
"grammar flush frequency (default: final only)");
RegisterOption("jobid", value(&jobid), "job identifier");
RegisterOption(
"obj", value(&objective_function)->default_value("G_MX"),
......@@ -69,7 +69,7 @@ class PcfgEvolveOptions : public CommonOptions {
RegisterOption("preserve-lex", bool_switch(&preserve_lex),
"preserve lexical part of grammar");
RegisterOption("seed", value(&seed)->default_value(0),
"seed (integer, optional)");
"random seed (integer, optional)");
RegisterOption("sharing-cutoff",
value(&sharing_cutoff)->default_value(1.0f),
"sharing cutoff");
......@@ -102,7 +102,7 @@ class PcfgEvolve {
static float GalibObjective(GAGenome &g); // NOLINT
static PreParserSet PreparseSampleWithContacts(
const SequenceSet<char> &sample, const Grammar<char> &lexical,
const ContactPairSet &contacts);
const ContactPairSet &contacts, const std::string &cmode);
static PreParserSet PreparseSampleWithoutContacts(
const SequenceSet<char> &sample, const Grammar<char> &lexical);
......@@ -168,11 +168,11 @@ void PcfgEvolve::Run() {
for (Rule<char> &rule : lexical_one.rules) rule.prob = 1.0;
Pxm_g_preparse_set =
PreparseSampleWithContacts(sample_set, lexical, contact_set);
PreparseSampleWithContacts(sample_set, lexical, contact_set, opts.cmode);
Px_g_preparse_set = PreparseSampleWithoutContacts(sample_set, lexical);
Pm_g_preparse_set =
PreparseSampleWithContacts(sample_set, lexical_one, contact_set);
PreparseSampleWithContacts(sample_set, lexical_one, contact_set, opts.cmode);
grammar = LoadWcfg<int>(opts.structural_part);
......@@ -327,12 +327,12 @@ float PcfgEvolve::ComputeScore(const FloatVector &vector) const {
PreParserSet new_pps, new_pps_zero;
if (!opts.preserve_lex) {
new_pps = PreparseSampleWithContacts(sample_set, norm_lexical, contact_set);
new_pps = PreparseSampleWithContacts(sample_set, norm_lexical, contact_set, opts.cmode);
new_pps_zero = PreparseSampleWithoutContacts(sample_set, norm_lexical);
}
const PreParserSet &norm_pps =
opts.preserve_lex ? Pxm_g_preparse_set : new_pps_zero;
opts.preserve_lex ? Pxm_g_preparse_set : new_pps;
const PreParserSet &norm_pps_zero =
opts.preserve_lex ? Px_g_preparse_set : new_pps_zero;
int pps_size = std::max(
......@@ -367,23 +367,24 @@ double PcfgEvolve::ScoreAt(CkyParser *parser, const PreParserSet &norm_pps_zero,
if (opts.objective_function == "G_X" || opts.objective_function == "X_G" ||
opts.objective_function == "M_XG" || opts.objective_function == "Z") {
Px_g = parser->ParseSample(norm_pps_zero[idx], opts.viterbi);
Px_g = parser->ParseSample(norm_pps_zero[idx], opts.cmode, opts.viterbi);
if (Px_g <= 0.0) return_bottom_score = true;
}
if (opts.objective_function == "G_MX" || opts.objective_function == "X_MG" ||
opts.objective_function == "M_XG" || opts.objective_function == "Z") {
Pxm_g = parser->ParseSample(norm_pps[idx], opts.viterbi);
opts.objective_function == "M_XG" || opts.objective_function == "XM_G" ||
opts.objective_function == "Z") {
Pxm_g = parser->ParseSample(norm_pps[idx], opts.cmode, opts.viterbi);
if (Pxm_g <= 0.0) return_bottom_score = true;
}
if (opts.objective_function == "G_MX" || opts.objective_function == "G_M" ||
opts.objective_function == "M_G" || opts.objective_function == "X_MG" ||
opts.objective_function == "Z") {
Pm_g = parser->ParseSample(Pm_g_preparse_set[idx], opts.viterbi);
if (opts.objective_function == "G_M" || opts.objective_function == "M_G" ||
opts.objective_function == "X_MG" || opts.objective_function == "Z") {
Pm_g = parser->ParseSample(Pm_g_preparse_set[idx], opts.cmode, opts.viterbi);
if (Pm_g <= 0.0) return_bottom_score = true;
}
if (return_bottom_score) {
return bottom_score;
} else if (opts.objective_function == "G_X" ||
......@@ -394,7 +395,8 @@ double PcfgEvolve::ScoreAt(CkyParser *parser, const PreParserSet &norm_pps_zero,
opts.objective_function == "M_G") {
// P(G|M) = P(M|G)*P(G) / P(M).
return log10(Pm_g);
} else if (opts.objective_function == "G_MX") {
} else if (opts.objective_function == "G_MX" ||
opts.objective_function == "XM_G") {
// P(G|MX) = P(XM|G)*P(G) / P(MX).
// Assumption: P(m(i)|x(i)) is equal for any i.
return log10(Pxm_g);
......@@ -414,12 +416,12 @@ double PcfgEvolve::ScoreAt(CkyParser *parser, const PreParserSet &norm_pps_zero,
PreParserSet PcfgEvolve::PreparseSampleWithContacts(
const SequenceSet<char> &sample, const Grammar<char> &lexical,
const ContactPairSet &contacts) {
const ContactPairSet &contacts, const std::string &cmode) {
PreParserSet pps;
pps.reserve(sample.size());
for (const Sequence<char> &seq : sample) {
if (contacts.count(seq.id)) {
pps.push_back(PreparseWithContacts(seq, lexical, contacts.at(seq.id)));
pps.push_back(PreparseWithContacts(seq, lexical, contacts.at(seq.id), cmode));
} else {
pps.push_back(Preparse(seq, lexical));
}
......
......@@ -26,7 +26,7 @@ class PcfgScanOptions : public CommonOptions {
std::set<std::string> objective_function_values{"X_G", "M_G", "XM_G", "X_MG",
"M_XG"};
std::string objective_function;
std::set<std::string> null_model_values{"p0", "p1"};
std::set<std::string> null_model_values{"p0", "p1", "eq"};
std::string null_model;
int winmax;
int winmin;
......@@ -59,11 +59,11 @@ class PcfgScanOptions : public CommonOptions {
("objective function " + AsString(objective_function_values)).c_str());
RegisterOption(
"null", value(&null_model),
("null model " + AsString(null_model_values) + " (optional)").c_str());
RegisterOption("winmax", value(&winmax)->default_value(INT_MAX),
"max. scanning window size (optional)");
RegisterOption("winmin", value(&winmin)->default_value(INT_MAX),
"min. scanning window size (optional)");
("null model " + AsString(null_model_values) + " (default: none)").c_str());
RegisterOption("winmax", value(&winmax)->default_value(0),
"max. scanning window size (default: full length)");
RegisterOption("winmin", value(&winmin)->default_value(0),
"min. scanning window size (default: full length)");
RegisterOption("out", value(&output)->default_value(""),
"scan scores (output file)");
}
......@@ -111,19 +111,25 @@ class PcfgScan {
null_grammar = EstimateNull0();
else if (opts.null_model == "p1")
null_grammar = EstimateNull1(lexical);
else if (opts.null_model == "eq")
null_grammar = EstimateNullEqual(lexical);
else if (!opts.null_model.empty())
throw std::runtime_error("Unknown null model");
int sample_max_length = 0;
int sample_min_length = INT_MAX;
for (const auto &seq : sample_set) {
int seq_length = seq.length();
if (seq_length > sample_max_length) {
sample_max_length = seq_length;
}
if (seq_length < sample_min_length) {
sample_min_length = seq_length;
}
}
const int winmax = std::min(opts.winmax, sample_max_length);
const int winmin = std::max(std::min(opts.winmin, winmax), 2);
const int winmax = std::max(2, opts.winmax>0 ? std::min(opts.winmax, sample_max_length) : sample_max_length);
const int winmin = std::max(2, opts.winmin>0 ? opts.winmin : std::min(winmax, sample_min_length));
// Create CNF for each window which is equivalent only for a given window.
std::unordered_map<int, Grammar<int>> null_rules;
......@@ -195,8 +201,8 @@ class PcfgScan {
size_t i = static_cast<size_t>(index);
worker.seq = &sample_set[i];
worker.seq_length = worker.seq->length();
worker.winmax_i = std::min(opts.winmax, worker.seq_length);
worker.winmin_i = std::max(std::min(opts.winmin, worker.winmax_i), 2);
worker.winmax_i = std::max(2, opts.winmax>0 ? std::min(opts.winmax, worker.seq_length) : worker.seq_length);
worker.winmin_i = std::max(2, opts.winmin>0 ? opts.winmin : (opts.winmax>0 ? 2 : worker.seq_length));
worker.w_limit = worker.seq_length - worker.winmin_i + 1;
worker.Score(opts.objective_function, &parser, &scores, &trees);
worker.NullScore(opts.objective_function, &null_parser, &scores);
......@@ -284,7 +290,7 @@ class PcfgScan {
if (use_map_null_score) {
null_score = map_null_score.at(v);
} else {
null_parser->at(v).Parse(null_pp_zero, w, v, opts.viterbi);
null_parser->at(v).Parse(null_pp_zero, w, v, opts.cmode, opts.viterbi);
null_score = log10(null_parser->at(v).chart.val(
v - 1, 0, null_rules.at(v).start));
}
......@@ -319,7 +325,7 @@ class PcfgScan {
FormattedOutput(stream, w, v_best, score_best, trees);
}
}
if (opts.group_by == "seq") {
if (opts.group_by == "seq" && w_limit > 0) {
FormattedOutput(stream, w_best, v_best, score_best, trees);
}
}
......@@ -332,7 +338,7 @@ class PcfgScan {
const Grammar<char> &lex = objective == "M_G" ? lexical_one : lexical;
if ((objective == "M_G" || objective == "XM_G") &&
contact_set.count(seq->id)) {
pp = PreparseWithContacts(*seq, lex, contact_set.at(seq->id));
pp = PreparseWithContacts(*seq, lex, contact_set.at(seq->id), opts.cmode);
} else {
pp = Preparse(*seq, lex);
}
......@@ -348,9 +354,9 @@ class PcfgScan {
int winmax_w = std::min(winmax_i, seq_length - w);
if (objective == "M_G" || objective == "XM_G") {
if (trees) {
parser->ParseTree(pp, w, winmax_w);
parser->ParseTree(pp, w, winmax_w, opts.cmode);
} else {
parser->Parse(pp, w, winmax_w, opts.viterbi);
parser->Parse(pp, w, winmax_w, opts.cmode, opts.viterbi);
}
}
for (int v = winmin_i; v <= winmax_w; ++v) {
......@@ -377,7 +383,7 @@ class PcfgScan {
stream << '\t' << trees.val(v - 1, w, 0);
}
stream << '\n';
}
};
};
};
......
......@@ -9,21 +9,35 @@
template <class T>
PreParser PreparseWithContacts(const Sequence<T> &sequence,
const Grammar<T> &lexical,
const ContactPairList &contacts,
const std::string &cmode) {
if (cmode == "at_most")
return PreparseWithContactsAtMost(sequence, lexical, contacts);
else
return PreparseWithContactsAtLeast(sequence, lexical, contacts);
}
template <class T>
PreParser PreparseWithContactsAtLeast(const Sequence<T> &sequence,
const Grammar<T> &lexical,
const ContactPairList &contacts) {
Chart3d<double> chart =
Chart3d<double>::Create(1, sequence.length(), lexical.max_lhs() + 1);
Chart3d<double> chart_pairs =
Chart3d<double>::Create(1, sequence.length(), lexical.max_lhs() + 1);
std::unordered_map<int, float> paired;
for (const ContactPair &cp : contacts) {
paired[cp.left] = cp.confidence;
paired[cp.right] = cp.confidence;
}
std::unordered_multimap<int, int> pair_at_pos;
for (const ContactPair &cp : contacts)
if (cp.confidence > 0) {
pair_at_pos.insert(std::make_pair(cp.left, cp.right - cp.left));
pair_at_pos.insert(std::make_pair(cp.right, cp.right - cp.left));
}
for (int i = 0; i < sequence.length(); ++i) {
for (const Rule<T> &rule : lexical.rules) {
if (rule.rhs[0] == sequence[i]) {
if (paired.count(i) && paired[i] > 0.0) {
if (pair_at_pos.count(i)) {
chart_pairs.val(0, i, rule.lhs) = rule.prob;
} else {
chart.val(0, i, rule.lhs) = rule.prob;
......@@ -31,16 +45,43 @@ PreParser PreparseWithContacts(const Sequence<T> &sequence,
}
}
}
std::unique_ptr<int[]> pair_at_pos =
std::make_unique<int[]>(sequence.data.size());
for (const auto &cp : contacts) {
pair_at_pos.get()[cp.left] = cp.right - cp.left;
pair_at_pos.get()[cp.right] = cp.right - cp.left;
return {std::move(chart), std::move(chart_pairs), contacts,
std::move(pair_at_pos)};
}
template <class T>
PreParser PreparseWithContactsAtMost(const Sequence<T> &sequence,
const Grammar<T> &lexical,
const ContactPairList &contacts) {
Chart3d<double> chart =
Chart3d<double>::Create(1, sequence.length(), lexical.max_lhs() + 1);
Chart3d<double> chart_pairs =
Chart3d<double>::Create(1, sequence.length(), lexical.max_lhs() + 1);
std::unordered_multimap<int, int> pair_at_pos;
for (const ContactPair &cp : contacts)
if (cp.confidence > 0) {
pair_at_pos.insert(std::make_pair(cp.left, cp.right - cp.left));
pair_at_pos.insert(std::make_pair(cp.right, cp.right - cp.left));
}
for (int i = 0; i < sequence.length(); ++i) {
for (const Rule<T> &rule : lexical.rules) {
if (rule.rhs[0] == sequence[i]) {
if (pair_at_pos.count(i)) {
chart_pairs.val(0, i, rule.lhs) = rule.prob;
}
chart.val(0, i, rule.lhs) = rule.prob;
}
}
}
return {std::move(chart), std::move(chart_pairs), contacts,
std::move(pair_at_pos)};
}
template <class T>
PreParser Preparse(const Sequence<T> &sequence, const Grammar<T> &lexical) {
Chart3d<double> chart =
......@@ -52,8 +93,7 @@ PreParser Preparse(const Sequence<T> &sequence, const Grammar<T> &lexical) {
}
}
}
std::unique_ptr<int[]> pair_at_pos =
std::make_unique<int[]>(sequence.data.size());
std::unordered_multimap<int, int> pair_at_pos;
return {std::move(chart), {}, {}, std::move(pair_at_pos)};
}
......@@ -61,5 +101,12 @@ PreParser Preparse(const Sequence<T> &sequence, const Grammar<T> &lexical) {
template PreParser Preparse<char>(const Sequence<char> &,
const Grammar<char> &);
template PreParser PreparseWithContacts<char>(const Sequence<char> &,
const Grammar<char> &,
const ContactPairList &,
const std::string &);
template PreParser PreparseWithContactsAtLeast<char>(const Sequence<char> &,
const Grammar<char> &,
const ContactPairList &);
template PreParser PreparseWithContactsAtMost<char>(const Sequence<char> &,
const Grammar<char> &,
const ContactPairList &);
......@@ -4,7 +4,9 @@
#define PGE_PREPARSER_H_
#include <memory>
#include <unordered_map>
#include <vector>
#include <string>
#include "pge/chart3d.h"
#include "pge/grammar.h"
......@@ -22,7 +24,7 @@ struct PreParser {
/// (lexical). Value: rule probability.
Chart3d<double> chart_pairs;
ContactPairList contacts;
std::unique_ptr<int[]> pair_at_pos;
std::unordered_multimap<int, int> pair_at_pos;
/// Length of sequence.
int length() const { return chart.dim_y; }
......@@ -33,13 +35,22 @@ struct PreParser {
/// Parse using lexical grammar and contact pairs.
template <class T>
PreParser PreparseWithContacts(const Sequence<T> &sequence,
const Grammar<T> &lexical,
const ContactPairList &contacts);
PreParser PreparseWithContacts(const Sequence<T> &,
const Grammar<T> &,
const ContactPairList &,
const std::string &);
template <class T>
PreParser PreparseWithContactsAtLeast(const Sequence<T> &,
const Grammar<T> &,
const ContactPairList &);
template <class T>
PreParser PreparseWithContactsAtMost(const Sequence<T> &,
const Grammar<T> &,
const ContactPairList &);
/// Parse using lexical grammar.
template <class T>
PreParser Preparse(const Sequence<T> &sequence, const Grammar<T> &lexical);
PreParser Preparse(const Sequence<T> &, const Grammar<T> &);
/// Collection of preparsers.
using PreParserSet = std::vector<PreParser>;
......
......@@ -5,10 +5,14 @@ cd tests
passed=0
all=0
test_cases+="evolve/brackets/gerep.sh "
test_cases+="evolve/csl/csl.at_most_evolve.sh "
test_cases+="scan/brackets/max.sh "
test_cases+="scan/brackets/sum.sh "
test_cases+="scan/brackets/tree.sh "
test_cases+="scan/cparse/cparse.at_least.sh "
test_cases+="scan/cparse/cparse.exactly.sh "
test_cases+="scan/cparse/cparse.at_most.sh "
test_cases+="scan/csl/csl.at_most.sh "
test_cases+="scan/hets/hets-null.sh "
test_cases+="scan/hets/hets.sh "
test_cases+="scan/octal/run.sh "
......
grammar_type WCFG
start_symbol -1
0 1 a
1 1 b
2 1 c
grammar_type WCFG
start_symbol 7
3 0.677133 1 3 2
3 0.322867 1 2
4 0.762384 0 4 1
4 0.237616 0 1
5 0.333572 4 2
5 0.666428 5 2
6 0.323099 0 3
6 0.676901 0 6
7 0.00598852 4 2
7 0.435122 5 2
7 0.122476 0 3
7 0.436414 0 6
grammar_type WCFG
start_symbol -1
0 1 a
1 1 b
2 1 c
grammar_type WCFG
start_symbol 7
3 0.666823 1 3 2
3 0.333177 1 2
4 0.759733 0 4 1
4 0.240268 0 1
5 0.332503 4 2
5 0.667497 5 2
6 0.330536 0 3
6 0.669464 0 6
7 0.0083888 4 2
7 0.479577 5 2
7 0.153684 0 3
7 0.35835 0 6
seed 849059420
score_filename ./2-4551451649865653852-galib.csv
replacement_number 1
minimaxi -1
number_of_generations 100
convergence_percentage 1.001
generations_to_convergence 10
crossover_probability 0.9
mutation_probability 0.001
population_size 100
replacement_percentage 0.5
number_of_best 1
score_frequency 1
flush_frequency 100
select_scores 31
record_diversity 1
0 2.46889 5.47274 1.88141 0.523526 0.338716
1 2.1347 2.4089 1.88141 0.146443 0.292784
2 2.03251 2.1906 1.86157 0.0802651 0.26815
3 1.97524 2.0741 1.86157 0.0574778 0.25756
4 1.94612 2.01647 1.86157 0.0418824 0.25845
5 1.92119 1.97647 1.85015 0.0306747 0.253837
6 1.9056 1.94293 1.82944 0.023661 0.24801
7 1.89587 1.93786 1.81441 0.0246111 0.240934
8 1.883 1.92223 1.81441 0.023511 0.238513
9 1.87016 1.9015 1.81114 0.0232408 0.223285
10 1.85676 1.88713 1.81114 0.0191306 0.186674
11 1.84795 1.87691 1.80124 0.0169752 0.178499
12 1.83853 1.85824 1.80124 0.0137164 0.168613
13 1.83092 1.85044 1.80124 0.0131436 0.16544
14 1.82563 1.84359 1.80004 0.0116886 0.163412
15 1.82105 1.83797 1.80004 0.00936469 0.154381
16 1.8156 1.82944 1.79836 0.00806624 0.150501
17 1.81196 1.82944 1.79475 0.00697962 0.150166
18 1.80931 1.82151 1.79475 0.00577593 0.146691
19 1.8072 1.81664 1.79475 0.00506425 0.144215
20 1.80537 1.81441 1.79475 0.00469431 0.138926
21 1.80331 1.81114 1.79147 0.00420008 0.124544
22 1.80134 1.8084 1.79147 0.00325612 0.11137
23 1.80034 1.8084 1.79147 0.00313044 0.112915
24 1.79904 1.8061 1.79147 0.002591 0.108324
25 1.79818 1.80443 1.79147 0.00265311 0.106947
26 1.79739 1.80443 1.79147 0.00261711 0.103626
27 1.7968 1.80443 1.79147 0.00268588 0.100721
28 1.7954 1.80443 1.79077 0.00249761 0.0893738
29 1.79466 1.80443 1.79052 0.00239215 0.0863516
30 1.7939 1.80443 1.79021 0.00223927 0.0820646
clist=seq_pos.fasta.cmap
cmode=at_most
fasta=seq_pos.fasta
lex=lex.wcfg
outdir=.
struct=struct.wcfg
threads=1
viterbi=0
galib-conf=2-4551451649865653852-galib.conf
grammar-flush-frequency=20
jobid=2
obj=G_MX
preserve-lex=0
seed=4551451649865653852
sharing-cutoff=20
# sample settings for GAlib applications
# GAlib expects parameters in name-value pairs. The name should be a single
# string (no whitespace allowed). The value should be of a type appropriate
# for the named parameter. Anything after a # character will be ignored.
# The file must end with a blank line. If you specify parameters that depend
# on other parameters, the last parameter will override the first or act on
# data modified by the first (the parameters are applied in the order they
# are listed).
minimaxi -1
number_of_generations 100
convergence_percentage 1.001
generations_to_convergence 10
crossover_probability 0.90
mutation_probability 0.001
population_size 100
replacement_percentage 0.50
number_of_best 1
score_frequency 1
flush_frequency 100
select_scores 31
record_diversity 1
#!/bin/bash
# Authors: Witold W. Dyrka
fasta="seq_pos.fasta"
clist="$fasta.cmap"
cmode="at_most"
gaconf="csl.at_most.conf"
lex="lex.wcfg"
struct="struct.wcfg"
obj="G_MX"
seed=4551451649865653852
code=2
indiv=0
gram_flush_freq=20
sharing_cutoff=20
stats="$code-$seed-galib.csv"
gaconf="$code-$seed-galib.conf"
grconf="$code-$seed-pcfg_evolve.conf"
structI="$code-$seed-$gram_flush_freq-$struct"
structF="$code-$seed-final-$indiv-$struct"
lexI="$code-$seed-$gram_flush_freq-$indiv-$lex"
lexF="$code-$seed-final-$indiv-$lex"
log="log.txt"
err="err.txt"
pcfg_evolve --jobid $code --seed $seed --fasta $fasta --clist $clist --cmode $cmode --galib-conf $gaconf --lex $lex --struct $struct --obj $obj --grammar-flush-frequency $gram_flush_freq --sharing-cutoff $sharing_cutoff --threads 1 1>$log 2>$err
test $? -eq 0 && test -z "$(git diff -- $0)" && test -z "$(git diff -- $stats)" && test -z "$(git diff -- $gaconf)" && test -z "$(git diff -- $grconf)" &&
test -z "$(git diff -- $structI)" &&<