Enhance tests coverage and report output

This commit is contained in:
2024-04-30 14:00:24 +02:00
parent b4a222b100
commit 3c7382a93a
947 changed files with 376596 additions and 3921 deletions

View File

@@ -31,17 +31,17 @@
<td class="headerValue">coverage.info</td>
<td></td>
<td class="headerItem">Lines:</td>
<td class="headerCovTableEntryHi">92.5&nbsp;%</td>
<td class="headerCovTableEntry">120</td>
<td class="headerCovTableEntryHi">97.4&nbsp;%</td>
<td class="headerCovTableEntry">114</td>
<td class="headerCovTableEntry">111</td>
</tr>
<tr>
<td class="headerItem">Test Date:</td>
<td class="headerValue">2024-04-29 20:48:03</td>
<td class="headerValue">2024-04-30 13:59:18</td>
<td></td>
<td class="headerItem">Functions:</td>
<td class="headerCovTableEntryHi">91.7&nbsp;%</td>
<td class="headerCovTableEntry">12</td>
<td class="headerCovTableEntryHi">100.0&nbsp;%</td>
<td class="headerCovTableEntry">11</td>
<td class="headerCovTableEntry">11</td>
</tr>
<tr><td><img src="../../glass.png" width=3 height=3 alt=""></td></tr>
@@ -70,165 +70,157 @@
<span id="L8"><span class="lineNum"> 8</span> : #include &quot;BayesMetrics.h&quot;</span>
<span id="L9"><span class="lineNum"> 9</span> : namespace bayesnet {</span>
<span id="L10"><span class="lineNum"> 10</span> : //samples is n+1xm tensor used to fit the model</span>
<span id="L11"><span class="lineNum"> 11</span> <span class="tlaGNC tlaBgGNC"> 3957 : Metrics::Metrics(const torch::Tensor&amp; samples, const std::vector&lt;std::string&gt;&amp; features, const std::string&amp; className, const int classNumStates)</span></span>
<span id="L12"><span class="lineNum"> 12</span> <span class="tlaGNC"> 3957 : : samples(samples)</span></span>
<span id="L13"><span class="lineNum"> 13</span> <span class="tlaGNC"> 3957 : , features(features)</span></span>
<span id="L14"><span class="lineNum"> 14</span> <span class="tlaGNC"> 3957 : , className(className)</span></span>
<span id="L15"><span class="lineNum"> 15</span> <span class="tlaGNC"> 3957 : , classNumStates(classNumStates)</span></span>
<span id="L11"><span class="lineNum"> 11</span> <span class="tlaGNC tlaBgGNC"> 2248 : Metrics::Metrics(const torch::Tensor&amp; samples, const std::vector&lt;std::string&gt;&amp; features, const std::string&amp; className, const int classNumStates)</span></span>
<span id="L12"><span class="lineNum"> 12</span> <span class="tlaGNC"> 2248 : : samples(samples)</span></span>
<span id="L13"><span class="lineNum"> 13</span> <span class="tlaGNC"> 2248 : , className(className)</span></span>
<span id="L14"><span class="lineNum"> 14</span> <span class="tlaGNC"> 2248 : , features(features)</span></span>
<span id="L15"><span class="lineNum"> 15</span> <span class="tlaGNC"> 2248 : , classNumStates(classNumStates)</span></span>
<span id="L16"><span class="lineNum"> 16</span> : {</span>
<span id="L17"><span class="lineNum"> 17</span> <span class="tlaGNC"> 3957 : }</span></span>
<span id="L17"><span class="lineNum"> 17</span> <span class="tlaGNC"> 2248 : }</span></span>
<span id="L18"><span class="lineNum"> 18</span> : //samples is n+1xm std::vector used to fit the model</span>
<span id="L19"><span class="lineNum"> 19</span> <span class="tlaGNC"> 176 : Metrics::Metrics(const std::vector&lt;std::vector&lt;int&gt;&gt;&amp; vsamples, const std::vector&lt;int&gt;&amp; labels, const std::vector&lt;std::string&gt;&amp; features, const std::string&amp; className, const int classNumStates)</span></span>
<span id="L20"><span class="lineNum"> 20</span> <span class="tlaGNC"> 176 : : features(features)</span></span>
<span id="L21"><span class="lineNum"> 21</span> <span class="tlaGNC"> 176 : , className(className)</span></span>
<span id="L22"><span class="lineNum"> 22</span> <span class="tlaGNC"> 176 : , classNumStates(classNumStates)</span></span>
<span id="L23"><span class="lineNum"> 23</span> <span class="tlaGNC"> 352 : , samples(torch::zeros({ static_cast&lt;int&gt;(vsamples.size() + 1), static_cast&lt;int&gt;(vsamples[0].size()) }, torch::kInt32))</span></span>
<span id="L19"><span class="lineNum"> 19</span> <span class="tlaGNC"> 96 : Metrics::Metrics(const std::vector&lt;std::vector&lt;int&gt;&gt;&amp; vsamples, const std::vector&lt;int&gt;&amp; labels, const std::vector&lt;std::string&gt;&amp; features, const std::string&amp; className, const int classNumStates)</span></span>
<span id="L20"><span class="lineNum"> 20</span> <span class="tlaGNC"> 96 : : samples(torch::zeros({ static_cast&lt;int&gt;(vsamples.size() + 1), static_cast&lt;int&gt;(vsamples[0].size()) }, torch::kInt32))</span></span>
<span id="L21"><span class="lineNum"> 21</span> <span class="tlaGNC"> 96 : , className(className)</span></span>
<span id="L22"><span class="lineNum"> 22</span> <span class="tlaGNC"> 96 : , features(features)</span></span>
<span id="L23"><span class="lineNum"> 23</span> <span class="tlaGNC"> 96 : , classNumStates(classNumStates)</span></span>
<span id="L24"><span class="lineNum"> 24</span> : {</span>
<span id="L25"><span class="lineNum"> 25</span> <span class="tlaGNC"> 1408 : for (int i = 0; i &lt; vsamples.size(); ++i) {</span></span>
<span id="L26"><span class="lineNum"> 26</span> <span class="tlaGNC"> 4928 : samples.index_put_({ i, &quot;...&quot; }, torch::tensor(vsamples[i], torch::kInt32));</span></span>
<span id="L25"><span class="lineNum"> 25</span> <span class="tlaGNC"> 768 : for (int i = 0; i &lt; vsamples.size(); ++i) {</span></span>
<span id="L26"><span class="lineNum"> 26</span> <span class="tlaGNC"> 2688 : samples.index_put_({ i, &quot;...&quot; }, torch::tensor(vsamples[i], torch::kInt32));</span></span>
<span id="L27"><span class="lineNum"> 27</span> : }</span>
<span id="L28"><span class="lineNum"> 28</span> <span class="tlaGNC"> 704 : samples.index_put_({ -1, &quot;...&quot; }, torch::tensor(labels, torch::kInt32));</span></span>
<span id="L29"><span class="lineNum"> 29</span> <span class="tlaGNC"> 1584 : }</span></span>
<span id="L30"><span class="lineNum"> 30</span> <span class="tlaGNC"> 1099 : std::vector&lt;int&gt; Metrics::SelectKBestWeighted(const torch::Tensor&amp; weights, bool ascending, unsigned k)</span></span>
<span id="L28"><span class="lineNum"> 28</span> <span class="tlaGNC"> 384 : samples.index_put_({ -1, &quot;...&quot; }, torch::tensor(labels, torch::kInt32));</span></span>
<span id="L29"><span class="lineNum"> 29</span> <span class="tlaGNC"> 864 : }</span></span>
<span id="L30"><span class="lineNum"> 30</span> <span class="tlaGNC"> 690 : std::vector&lt;int&gt; Metrics::SelectKBestWeighted(const torch::Tensor&amp; weights, bool ascending, unsigned k)</span></span>
<span id="L31"><span class="lineNum"> 31</span> : {</span>
<span id="L32"><span class="lineNum"> 32</span> : // Return the K Best features </span>
<span id="L33"><span class="lineNum"> 33</span> <span class="tlaGNC"> 1099 : auto n = features.size();</span></span>
<span id="L34"><span class="lineNum"> 34</span> <span class="tlaGNC"> 1099 : if (k == 0) {</span></span>
<span id="L33"><span class="lineNum"> 33</span> <span class="tlaGNC"> 690 : auto n = features.size();</span></span>
<span id="L34"><span class="lineNum"> 34</span> <span class="tlaGNC"> 690 : if (k == 0) {</span></span>
<span id="L35"><span class="lineNum"> 35</span> <span class="tlaUNC tlaBgUNC"> 0 : k = n;</span></span>
<span id="L36"><span class="lineNum"> 36</span> : }</span>
<span id="L37"><span class="lineNum"> 37</span> : // compute scores</span>
<span id="L38"><span class="lineNum"> 38</span> <span class="tlaGNC tlaBgGNC"> 1099 : scoresKBest.clear();</span></span>
<span id="L39"><span class="lineNum"> 39</span> <span class="tlaGNC"> 1099 : featuresKBest.clear();</span></span>
<span id="L40"><span class="lineNum"> 40</span> <span class="tlaGNC"> 3297 : auto label = samples.index({ -1, &quot;...&quot; });</span></span>
<span id="L41"><span class="lineNum"> 41</span> <span class="tlaGNC"> 37425 : for (int i = 0; i &lt; n; ++i) {</span></span>
<span id="L42"><span class="lineNum"> 42</span> <span class="tlaGNC"> 108978 : scoresKBest.push_back(mutualInformation(label, samples.index({ i, &quot;...&quot; }), weights));</span></span>
<span id="L43"><span class="lineNum"> 43</span> <span class="tlaGNC"> 36326 : featuresKBest.push_back(i);</span></span>
<span id="L38"><span class="lineNum"> 38</span> <span class="tlaGNC tlaBgGNC"> 690 : scoresKBest.clear();</span></span>
<span id="L39"><span class="lineNum"> 39</span> <span class="tlaGNC"> 690 : featuresKBest.clear();</span></span>
<span id="L40"><span class="lineNum"> 40</span> <span class="tlaGNC"> 2070 : auto label = samples.index({ -1, &quot;...&quot; });</span></span>
<span id="L41"><span class="lineNum"> 41</span> <span class="tlaGNC"> 15576 : for (int i = 0; i &lt; n; ++i) {</span></span>
<span id="L42"><span class="lineNum"> 42</span> <span class="tlaGNC"> 44658 : scoresKBest.push_back(mutualInformation(label, samples.index({ i, &quot;...&quot; }), weights));</span></span>
<span id="L43"><span class="lineNum"> 43</span> <span class="tlaGNC"> 14886 : featuresKBest.push_back(i);</span></span>
<span id="L44"><span class="lineNum"> 44</span> : }</span>
<span id="L45"><span class="lineNum"> 45</span> : // sort &amp; reduce scores and features</span>
<span id="L46"><span class="lineNum"> 46</span> <span class="tlaGNC"> 1099 : if (ascending) {</span></span>
<span id="L47"><span class="lineNum"> 47</span> <span class="tlaGNC"> 245 : sort(featuresKBest.begin(), featuresKBest.end(), [&amp;](int i, int j)</span></span>
<span id="L48"><span class="lineNum"> 48</span> <span class="tlaGNC"> 5931 : { return scoresKBest[i] &lt; scoresKBest[j]; });</span></span>
<span id="L49"><span class="lineNum"> 49</span> <span class="tlaGNC"> 245 : sort(scoresKBest.begin(), scoresKBest.end(), std::less&lt;double&gt;());</span></span>
<span id="L50"><span class="lineNum"> 50</span> <span class="tlaGNC"> 245 : if (k &lt; n) {</span></span>
<span id="L51"><span class="lineNum"> 51</span> <span class="tlaGNC"> 308 : for (int i = 0; i &lt; n - k; ++i) {</span></span>
<span id="L52"><span class="lineNum"> 52</span> <span class="tlaGNC"> 220 : featuresKBest.erase(featuresKBest.begin());</span></span>
<span id="L53"><span class="lineNum"> 53</span> <span class="tlaGNC"> 220 : scoresKBest.erase(scoresKBest.begin());</span></span>
<span id="L46"><span class="lineNum"> 46</span> <span class="tlaGNC"> 690 : if (ascending) {</span></span>
<span id="L47"><span class="lineNum"> 47</span> <span class="tlaGNC"> 114 : sort(featuresKBest.begin(), featuresKBest.end(), [&amp;](int i, int j)</span></span>
<span id="L48"><span class="lineNum"> 48</span> <span class="tlaGNC"> 2718 : { return scoresKBest[i] &lt; scoresKBest[j]; });</span></span>
<span id="L49"><span class="lineNum"> 49</span> <span class="tlaGNC"> 114 : sort(scoresKBest.begin(), scoresKBest.end(), std::less&lt;double&gt;());</span></span>
<span id="L50"><span class="lineNum"> 50</span> <span class="tlaGNC"> 114 : if (k &lt; n) {</span></span>
<span id="L51"><span class="lineNum"> 51</span> <span class="tlaGNC"> 168 : for (int i = 0; i &lt; n - k; ++i) {</span></span>
<span id="L52"><span class="lineNum"> 52</span> <span class="tlaGNC"> 120 : featuresKBest.erase(featuresKBest.begin());</span></span>
<span id="L53"><span class="lineNum"> 53</span> <span class="tlaGNC"> 120 : scoresKBest.erase(scoresKBest.begin());</span></span>
<span id="L54"><span class="lineNum"> 54</span> : }</span>
<span id="L55"><span class="lineNum"> 55</span> : }</span>
<span id="L56"><span class="lineNum"> 56</span> : } else {</span>
<span id="L57"><span class="lineNum"> 57</span> <span class="tlaGNC"> 854 : sort(featuresKBest.begin(), featuresKBest.end(), [&amp;](int i, int j)</span></span>
<span id="L58"><span class="lineNum"> 58</span> <span class="tlaGNC"> 168709 : { return scoresKBest[i] &gt; scoresKBest[j]; });</span></span>
<span id="L59"><span class="lineNum"> 59</span> <span class="tlaGNC"> 854 : sort(scoresKBest.begin(), scoresKBest.end(), std::greater&lt;double&gt;());</span></span>
<span id="L60"><span class="lineNum"> 60</span> <span class="tlaGNC"> 854 : featuresKBest.resize(k);</span></span>
<span id="L61"><span class="lineNum"> 61</span> <span class="tlaGNC"> 854 : scoresKBest.resize(k);</span></span>
<span id="L57"><span class="lineNum"> 57</span> <span class="tlaGNC"> 576 : sort(featuresKBest.begin(), featuresKBest.end(), [&amp;](int i, int j)</span></span>
<span id="L58"><span class="lineNum"> 58</span> <span class="tlaGNC"> 97212 : { return scoresKBest[i] &gt; scoresKBest[j]; });</span></span>
<span id="L59"><span class="lineNum"> 59</span> <span class="tlaGNC"> 576 : sort(scoresKBest.begin(), scoresKBest.end(), std::greater&lt;double&gt;());</span></span>
<span id="L60"><span class="lineNum"> 60</span> <span class="tlaGNC"> 576 : featuresKBest.resize(k);</span></span>
<span id="L61"><span class="lineNum"> 61</span> <span class="tlaGNC"> 576 : scoresKBest.resize(k);</span></span>
<span id="L62"><span class="lineNum"> 62</span> : }</span>
<span id="L63"><span class="lineNum"> 63</span> <span class="tlaGNC"> 2198 : return featuresKBest;</span></span>
<span id="L64"><span class="lineNum"> 64</span> <span class="tlaGNC"> 38524 : }</span></span>
<span id="L65"><span class="lineNum"> 65</span> <span class="tlaGNC"> 88 : std::vector&lt;double&gt; Metrics::getScoresKBest() const</span></span>
<span id="L63"><span class="lineNum"> 63</span> <span class="tlaGNC"> 1380 : return featuresKBest;</span></span>
<span id="L64"><span class="lineNum"> 64</span> <span class="tlaGNC"> 16266 : }</span></span>
<span id="L65"><span class="lineNum"> 65</span> <span class="tlaGNC"> 48 : std::vector&lt;double&gt; Metrics::getScoresKBest() const</span></span>
<span id="L66"><span class="lineNum"> 66</span> : {</span>
<span id="L67"><span class="lineNum"> 67</span> <span class="tlaGNC"> 88 : return scoresKBest;</span></span>
<span id="L67"><span class="lineNum"> 67</span> <span class="tlaGNC"> 48 : return scoresKBest;</span></span>
<span id="L68"><span class="lineNum"> 68</span> : }</span>
<span id="L69"><span class="lineNum"> 69</span> : </span>
<span id="L70"><span class="lineNum"> 70</span> <span class="tlaGNC"> 374 : torch::Tensor Metrics::conditionalEdge(const torch::Tensor&amp; weights)</span></span>
<span id="L70"><span class="lineNum"> 70</span> <span class="tlaGNC"> 204 : torch::Tensor Metrics::conditionalEdge(const torch::Tensor&amp; weights)</span></span>
<span id="L71"><span class="lineNum"> 71</span> : {</span>
<span id="L72"><span class="lineNum"> 72</span> <span class="tlaGNC"> 374 : auto result = std::vector&lt;double&gt;();</span></span>
<span id="L73"><span class="lineNum"> 73</span> <span class="tlaGNC"> 374 : auto source = std::vector&lt;std::string&gt;(features);</span></span>
<span id="L74"><span class="lineNum"> 74</span> <span class="tlaGNC"> 374 : source.push_back(className);</span></span>
<span id="L75"><span class="lineNum"> 75</span> <span class="tlaGNC"> 374 : auto combinations = doCombinations(source);</span></span>
<span id="L72"><span class="lineNum"> 72</span> <span class="tlaGNC"> 204 : auto result = std::vector&lt;double&gt;();</span></span>
<span id="L73"><span class="lineNum"> 73</span> <span class="tlaGNC"> 204 : auto source = std::vector&lt;std::string&gt;(features);</span></span>
<span id="L74"><span class="lineNum"> 74</span> <span class="tlaGNC"> 204 : source.push_back(className);</span></span>
<span id="L75"><span class="lineNum"> 75</span> <span class="tlaGNC"> 204 : auto combinations = doCombinations(source);</span></span>
<span id="L76"><span class="lineNum"> 76</span> : // Compute class prior</span>
<span id="L77"><span class="lineNum"> 77</span> <span class="tlaGNC"> 374 : auto margin = torch::zeros({ classNumStates }, torch::kFloat);</span></span>
<span id="L78"><span class="lineNum"> 78</span> <span class="tlaGNC"> 2024 : for (int value = 0; value &lt; classNumStates; ++value) {</span></span>
<span id="L79"><span class="lineNum"> 79</span> <span class="tlaGNC"> 6600 : auto mask = samples.index({ -1, &quot;...&quot; }) == value;</span></span>
<span id="L80"><span class="lineNum"> 80</span> <span class="tlaGNC"> 1650 : margin[value] = mask.sum().item&lt;double&gt;() / samples.size(1);</span></span>
<span id="L81"><span class="lineNum"> 81</span> <span class="tlaGNC"> 1650 : }</span></span>
<span id="L82"><span class="lineNum"> 82</span> <span class="tlaGNC"> 10098 : for (auto [first, second] : combinations) {</span></span>
<span id="L83"><span class="lineNum"> 83</span> <span class="tlaGNC"> 9724 : int index_first = find(features.begin(), features.end(), first) - features.begin();</span></span>
<span id="L84"><span class="lineNum"> 84</span> <span class="tlaGNC"> 9724 : int index_second = find(features.begin(), features.end(), second) - features.begin();</span></span>
<span id="L85"><span class="lineNum"> 85</span> <span class="tlaGNC"> 9724 : double accumulated = 0;</span></span>
<span id="L86"><span class="lineNum"> 86</span> <span class="tlaGNC"> 57640 : for (int value = 0; value &lt; classNumStates; ++value) {</span></span>
<span id="L87"><span class="lineNum"> 87</span> <span class="tlaGNC"> 191664 : auto mask = samples.index({ -1, &quot;...&quot; }) == value;</span></span>
<span id="L88"><span class="lineNum"> 88</span> <span class="tlaGNC"> 143748 : auto first_dataset = samples.index({ index_first, mask });</span></span>
<span id="L89"><span class="lineNum"> 89</span> <span class="tlaGNC"> 143748 : auto second_dataset = samples.index({ index_second, mask });</span></span>
<span id="L90"><span class="lineNum"> 90</span> <span class="tlaGNC"> 95832 : auto weights_dataset = weights.index({ mask });</span></span>
<span id="L91"><span class="lineNum"> 91</span> <span class="tlaGNC"> 95832 : auto mi = mutualInformation(first_dataset, second_dataset, weights_dataset);</span></span>
<span id="L92"><span class="lineNum"> 92</span> <span class="tlaGNC"> 47916 : auto pb = margin[value].item&lt;double&gt;();</span></span>
<span id="L93"><span class="lineNum"> 93</span> <span class="tlaGNC"> 47916 : accumulated += pb * mi;</span></span>
<span id="L94"><span class="lineNum"> 94</span> <span class="tlaGNC"> 47916 : }</span></span>
<span id="L95"><span class="lineNum"> 95</span> <span class="tlaGNC"> 9724 : result.push_back(accumulated);</span></span>
<span id="L96"><span class="lineNum"> 96</span> <span class="tlaGNC"> 9724 : }</span></span>
<span id="L97"><span class="lineNum"> 97</span> <span class="tlaGNC"> 374 : long n_vars = source.size();</span></span>
<span id="L98"><span class="lineNum"> 98</span> <span class="tlaGNC"> 374 : auto matrix = torch::zeros({ n_vars, n_vars });</span></span>
<span id="L99"><span class="lineNum"> 99</span> <span class="tlaGNC"> 374 : auto indices = torch::triu_indices(n_vars, n_vars, 1);</span></span>
<span id="L100"><span class="lineNum"> 100</span> <span class="tlaGNC"> 10098 : for (auto i = 0; i &lt; result.size(); ++i) {</span></span>
<span id="L101"><span class="lineNum"> 101</span> <span class="tlaGNC"> 9724 : auto x = indices[0][i];</span></span>
<span id="L102"><span class="lineNum"> 102</span> <span class="tlaGNC"> 9724 : auto y = indices[1][i];</span></span>
<span id="L103"><span class="lineNum"> 103</span> <span class="tlaGNC"> 9724 : matrix[x][y] = result[i];</span></span>
<span id="L104"><span class="lineNum"> 104</span> <span class="tlaGNC"> 9724 : matrix[y][x] = result[i];</span></span>
<span id="L105"><span class="lineNum"> 105</span> <span class="tlaGNC"> 9724 : }</span></span>
<span id="L106"><span class="lineNum"> 106</span> <span class="tlaGNC"> 748 : return matrix;</span></span>
<span id="L107"><span class="lineNum"> 107</span> <span class="tlaGNC"> 241604 : }</span></span>
<span id="L108"><span class="lineNum"> 108</span> : // To use in Python</span>
<span id="L109"><span class="lineNum"> 109</span> <span class="tlaUNC tlaBgUNC"> 0 : std::vector&lt;float&gt; Metrics::conditionalEdgeWeights(std::vector&lt;float&gt;&amp; weights_)</span></span>
<span id="L110"><span class="lineNum"> 110</span> : {</span>
<span id="L111"><span class="lineNum"> 111</span> <span class="tlaUNC"> 0 : const torch::Tensor weights = torch::tensor(weights_);</span></span>
<span id="L112"><span class="lineNum"> 112</span> <span class="tlaUNC"> 0 : auto matrix = conditionalEdge(weights);</span></span>
<span id="L113"><span class="lineNum"> 113</span> <span class="tlaUNC"> 0 : std::vector&lt;float&gt; v(matrix.data_ptr&lt;float&gt;(), matrix.data_ptr&lt;float&gt;() + matrix.numel());</span></span>
<span id="L114"><span class="lineNum"> 114</span> <span class="tlaUNC"> 0 : return v;</span></span>
<span id="L115"><span class="lineNum"> 115</span> <span class="tlaUNC"> 0 : }</span></span>
<span id="L116"><span class="lineNum"> 116</span> <span class="tlaGNC tlaBgGNC"> 101565 : double Metrics::entropy(const torch::Tensor&amp; feature, const torch::Tensor&amp; weights)</span></span>
<span id="L117"><span class="lineNum"> 117</span> : {</span>
<span id="L118"><span class="lineNum"> 118</span> <span class="tlaGNC"> 101565 : torch::Tensor counts = feature.bincount(weights);</span></span>
<span id="L119"><span class="lineNum"> 119</span> <span class="tlaGNC"> 101565 : double totalWeight = counts.sum().item&lt;double&gt;();</span></span>
<span id="L120"><span class="lineNum"> 120</span> <span class="tlaGNC"> 101565 : torch::Tensor probs = counts.to(torch::kFloat) / totalWeight;</span></span>
<span id="L121"><span class="lineNum"> 121</span> <span class="tlaGNC"> 101565 : torch::Tensor logProbs = torch::log(probs);</span></span>
<span id="L122"><span class="lineNum"> 122</span> <span class="tlaGNC"> 101565 : torch::Tensor entropy = -probs * logProbs;</span></span>
<span id="L123"><span class="lineNum"> 123</span> <span class="tlaGNC"> 203130 : return entropy.nansum().item&lt;double&gt;();</span></span>
<span id="L124"><span class="lineNum"> 124</span> <span class="tlaGNC"> 101565 : }</span></span>
<span id="L125"><span class="lineNum"> 125</span> : // H(Y|X) = sum_{x in X} p(x) H(Y|X=x)</span>
<span id="L126"><span class="lineNum"> 126</span> <span class="tlaGNC"> 91263 : double Metrics::conditionalEntropy(const torch::Tensor&amp; firstFeature, const torch::Tensor&amp; secondFeature, const torch::Tensor&amp; weights)</span></span>
<span id="L127"><span class="lineNum"> 127</span> : {</span>
<span id="L128"><span class="lineNum"> 128</span> <span class="tlaGNC"> 91263 : int numSamples = firstFeature.sizes()[0];</span></span>
<span id="L129"><span class="lineNum"> 129</span> <span class="tlaGNC"> 91263 : torch::Tensor featureCounts = secondFeature.bincount(weights);</span></span>
<span id="L130"><span class="lineNum"> 130</span> <span class="tlaGNC"> 91263 : std::unordered_map&lt;int, std::unordered_map&lt;int, double&gt;&gt; jointCounts;</span></span>
<span id="L131"><span class="lineNum"> 131</span> <span class="tlaGNC"> 91263 : double totalWeight = 0;</span></span>
<span id="L132"><span class="lineNum"> 132</span> <span class="tlaGNC"> 11715815 : for (auto i = 0; i &lt; numSamples; i++) {</span></span>
<span id="L133"><span class="lineNum"> 133</span> <span class="tlaGNC"> 11624552 : jointCounts[secondFeature[i].item&lt;int&gt;()][firstFeature[i].item&lt;int&gt;()] += weights[i].item&lt;double&gt;();</span></span>
<span id="L134"><span class="lineNum"> 134</span> <span class="tlaGNC"> 11624552 : totalWeight += weights[i].item&lt;float&gt;();</span></span>
<span id="L135"><span class="lineNum"> 135</span> : }</span>
<span id="L136"><span class="lineNum"> 136</span> <span class="tlaGNC"> 91263 : if (totalWeight == 0)</span></span>
<span id="L137"><span class="lineNum"> 137</span> <span class="tlaUNC tlaBgUNC"> 0 : return 0;</span></span>
<span id="L138"><span class="lineNum"> 138</span> <span class="tlaGNC tlaBgGNC"> 91263 : double entropyValue = 0;</span></span>
<span id="L139"><span class="lineNum"> 139</span> <span class="tlaGNC"> 311456 : for (int value = 0; value &lt; featureCounts.sizes()[0]; ++value) {</span></span>
<span id="L140"><span class="lineNum"> 140</span> <span class="tlaGNC"> 220193 : double p_f = featureCounts[value].item&lt;double&gt;() / totalWeight;</span></span>
<span id="L141"><span class="lineNum"> 141</span> <span class="tlaGNC"> 220193 : double entropy_f = 0;</span></span>
<span id="L142"><span class="lineNum"> 142</span> <span class="tlaGNC"> 655015 : for (auto&amp; [label, jointCount] : jointCounts[value]) {</span></span>
<span id="L143"><span class="lineNum"> 143</span> <span class="tlaGNC"> 434822 : double p_l_f = jointCount / featureCounts[value].item&lt;double&gt;();</span></span>
<span id="L144"><span class="lineNum"> 144</span> <span class="tlaGNC"> 434822 : if (p_l_f &gt; 0) {</span></span>
<span id="L145"><span class="lineNum"> 145</span> <span class="tlaGNC"> 434822 : entropy_f -= p_l_f * log(p_l_f);</span></span>
<span id="L146"><span class="lineNum"> 146</span> : } else {</span>
<span id="L147"><span class="lineNum"> 147</span> <span class="tlaUNC tlaBgUNC"> 0 : entropy_f = 0;</span></span>
<span id="L148"><span class="lineNum"> 148</span> : }</span>
<span id="L149"><span class="lineNum"> 149</span> : }</span>
<span id="L150"><span class="lineNum"> 150</span> <span class="tlaGNC tlaBgGNC"> 220193 : entropyValue += p_f * entropy_f;</span></span>
<span id="L151"><span class="lineNum"> 151</span> : }</span>
<span id="L152"><span class="lineNum"> 152</span> <span class="tlaGNC"> 91263 : return entropyValue;</span></span>
<span id="L153"><span class="lineNum"> 153</span> <span class="tlaGNC"> 91263 : }</span></span>
<span id="L154"><span class="lineNum"> 154</span> : // I(X;Y) = H(Y) - H(Y|X)</span>
<span id="L155"><span class="lineNum"> 155</span> <span class="tlaGNC"> 91263 : double Metrics::mutualInformation(const torch::Tensor&amp; firstFeature, const torch::Tensor&amp; secondFeature, const torch::Tensor&amp; weights)</span></span>
<span id="L156"><span class="lineNum"> 156</span> : {</span>
<span id="L157"><span class="lineNum"> 157</span> <span class="tlaGNC"> 91263 : return entropy(firstFeature, weights) - conditionalEntropy(firstFeature, secondFeature, weights);</span></span>
<span id="L158"><span class="lineNum"> 158</span> : }</span>
<span id="L159"><span class="lineNum"> 159</span> : /*</span>
<span id="L160"><span class="lineNum"> 160</span> : Compute the maximum spanning tree considering the weights as distances</span>
<span id="L161"><span class="lineNum"> 161</span> : and the indices of the weights as nodes of this square matrix using</span>
<span id="L162"><span class="lineNum"> 162</span> : Kruskal algorithm</span>
<span id="L163"><span class="lineNum"> 163</span> : */</span>
<span id="L164"><span class="lineNum"> 164</span> <span class="tlaGNC"> 319 : std::vector&lt;std::pair&lt;int, int&gt;&gt; Metrics::maximumSpanningTree(const std::vector&lt;std::string&gt;&amp; features, const torch::Tensor&amp; weights, const int root)</span></span>
<span id="L165"><span class="lineNum"> 165</span> : {</span>
<span id="L166"><span class="lineNum"> 166</span> <span class="tlaGNC"> 319 : auto mst = MST(features, weights, root);</span></span>
<span id="L167"><span class="lineNum"> 167</span> <span class="tlaGNC"> 638 : return mst.maximumSpanningTree();</span></span>
<span id="L168"><span class="lineNum"> 168</span> <span class="tlaGNC"> 319 : }</span></span>
<span id="L169"><span class="lineNum"> 169</span> : }</span>
<span id="L77"><span class="lineNum"> 77</span> <span class="tlaGNC"> 204 : auto margin = torch::zeros({ classNumStates }, torch::kFloat);</span></span>
<span id="L78"><span class="lineNum"> 78</span> <span class="tlaGNC"> 1104 : for (int value = 0; value &lt; classNumStates; ++value) {</span></span>
<span id="L79"><span class="lineNum"> 79</span> <span class="tlaGNC"> 3600 : auto mask = samples.index({ -1, &quot;...&quot; }) == value;</span></span>
<span id="L80"><span class="lineNum"> 80</span> <span class="tlaGNC"> 900 : margin[value] = mask.sum().item&lt;double&gt;() / samples.size(1);</span></span>
<span id="L81"><span class="lineNum"> 81</span> <span class="tlaGNC"> 900 : }</span></span>
<span id="L82"><span class="lineNum"> 82</span> <span class="tlaGNC"> 5508 : for (auto [first, second] : combinations) {</span></span>
<span id="L83"><span class="lineNum"> 83</span> <span class="tlaGNC"> 5304 : int index_first = find(features.begin(), features.end(), first) - features.begin();</span></span>
<span id="L84"><span class="lineNum"> 84</span> <span class="tlaGNC"> 5304 : int index_second = find(features.begin(), features.end(), second) - features.begin();</span></span>
<span id="L85"><span class="lineNum"> 85</span> <span class="tlaGNC"> 5304 : double accumulated = 0;</span></span>
<span id="L86"><span class="lineNum"> 86</span> <span class="tlaGNC"> 31440 : for (int value = 0; value &lt; classNumStates; ++value) {</span></span>
<span id="L87"><span class="lineNum"> 87</span> <span class="tlaGNC"> 104544 : auto mask = samples.index({ -1, &quot;...&quot; }) == value;</span></span>
<span id="L88"><span class="lineNum"> 88</span> <span class="tlaGNC"> 78408 : auto first_dataset = samples.index({ index_first, mask });</span></span>
<span id="L89"><span class="lineNum"> 89</span> <span class="tlaGNC"> 78408 : auto second_dataset = samples.index({ index_second, mask });</span></span>
<span id="L90"><span class="lineNum"> 90</span> <span class="tlaGNC"> 52272 : auto weights_dataset = weights.index({ mask });</span></span>
<span id="L91"><span class="lineNum"> 91</span> <span class="tlaGNC"> 52272 : auto mi = mutualInformation(first_dataset, second_dataset, weights_dataset);</span></span>
<span id="L92"><span class="lineNum"> 92</span> <span class="tlaGNC"> 26136 : auto pb = margin[value].item&lt;double&gt;();</span></span>
<span id="L93"><span class="lineNum"> 93</span> <span class="tlaGNC"> 26136 : accumulated += pb * mi;</span></span>
<span id="L94"><span class="lineNum"> 94</span> <span class="tlaGNC"> 26136 : }</span></span>
<span id="L95"><span class="lineNum"> 95</span> <span class="tlaGNC"> 5304 : result.push_back(accumulated);</span></span>
<span id="L96"><span class="lineNum"> 96</span> <span class="tlaGNC"> 5304 : }</span></span>
<span id="L97"><span class="lineNum"> 97</span> <span class="tlaGNC"> 204 : long n_vars = source.size();</span></span>
<span id="L98"><span class="lineNum"> 98</span> <span class="tlaGNC"> 204 : auto matrix = torch::zeros({ n_vars, n_vars });</span></span>
<span id="L99"><span class="lineNum"> 99</span> <span class="tlaGNC"> 204 : auto indices = torch::triu_indices(n_vars, n_vars, 1);</span></span>
<span id="L100"><span class="lineNum"> 100</span> <span class="tlaGNC"> 5508 : for (auto i = 0; i &lt; result.size(); ++i) {</span></span>
<span id="L101"><span class="lineNum"> 101</span> <span class="tlaGNC"> 5304 : auto x = indices[0][i];</span></span>
<span id="L102"><span class="lineNum"> 102</span> <span class="tlaGNC"> 5304 : auto y = indices[1][i];</span></span>
<span id="L103"><span class="lineNum"> 103</span> <span class="tlaGNC"> 5304 : matrix[x][y] = result[i];</span></span>
<span id="L104"><span class="lineNum"> 104</span> <span class="tlaGNC"> 5304 : matrix[y][x] = result[i];</span></span>
<span id="L105"><span class="lineNum"> 105</span> <span class="tlaGNC"> 5304 : }</span></span>
<span id="L106"><span class="lineNum"> 106</span> <span class="tlaGNC"> 408 : return matrix;</span></span>
<span id="L107"><span class="lineNum"> 107</span> <span class="tlaGNC"> 131784 : }</span></span>
<span id="L108"><span class="lineNum"> 108</span> <span class="tlaGNC"> 50295 : double Metrics::entropy(const torch::Tensor&amp; feature, const torch::Tensor&amp; weights)</span></span>
<span id="L109"><span class="lineNum"> 109</span> : {</span>
<span id="L110"><span class="lineNum"> 110</span> <span class="tlaGNC"> 50295 : torch::Tensor counts = feature.bincount(weights);</span></span>
<span id="L111"><span class="lineNum"> 111</span> <span class="tlaGNC"> 50295 : double totalWeight = counts.sum().item&lt;double&gt;();</span></span>
<span id="L112"><span class="lineNum"> 112</span> <span class="tlaGNC"> 50295 : torch::Tensor probs = counts.to(torch::kFloat) / totalWeight;</span></span>
<span id="L113"><span class="lineNum"> 113</span> <span class="tlaGNC"> 50295 : torch::Tensor logProbs = torch::log(probs);</span></span>
<span id="L114"><span class="lineNum"> 114</span> <span class="tlaGNC"> 50295 : torch::Tensor entropy = -probs * logProbs;</span></span>
<span id="L115"><span class="lineNum"> 115</span> <span class="tlaGNC"> 100590 : return entropy.nansum().item&lt;double&gt;();</span></span>
<span id="L116"><span class="lineNum"> 116</span> <span class="tlaGNC"> 50295 : }</span></span>
<span id="L117"><span class="lineNum"> 117</span> : // H(Y|X) = sum_{x in X} p(x) H(Y|X=x)</span>
<span id="L118"><span class="lineNum"> 118</span> <span class="tlaGNC"> 44793 : double Metrics::conditionalEntropy(const torch::Tensor&amp; firstFeature, const torch::Tensor&amp; secondFeature, const torch::Tensor&amp; weights)</span></span>
<span id="L119"><span class="lineNum"> 119</span> : {</span>
<span id="L120"><span class="lineNum"> 120</span> <span class="tlaGNC"> 44793 : int numSamples = firstFeature.sizes()[0];</span></span>
<span id="L121"><span class="lineNum"> 121</span> <span class="tlaGNC"> 44793 : torch::Tensor featureCounts = secondFeature.bincount(weights);</span></span>
<span id="L122"><span class="lineNum"> 122</span> <span class="tlaGNC"> 44793 : std::unordered_map&lt;int, std::unordered_map&lt;int, double&gt;&gt; jointCounts;</span></span>
<span id="L123"><span class="lineNum"> 123</span> <span class="tlaGNC"> 44793 : double totalWeight = 0;</span></span>
<span id="L124"><span class="lineNum"> 124</span> <span class="tlaGNC"> 8954403 : for (auto i = 0; i &lt; numSamples; i++) {</span></span>
<span id="L125"><span class="lineNum"> 125</span> <span class="tlaGNC"> 8909610 : jointCounts[secondFeature[i].item&lt;int&gt;()][firstFeature[i].item&lt;int&gt;()] += weights[i].item&lt;double&gt;();</span></span>
<span id="L126"><span class="lineNum"> 126</span> <span class="tlaGNC"> 8909610 : totalWeight += weights[i].item&lt;float&gt;();</span></span>
<span id="L127"><span class="lineNum"> 127</span> : }</span>
<span id="L128"><span class="lineNum"> 128</span> <span class="tlaGNC"> 44793 : if (totalWeight == 0)</span></span>
<span id="L129"><span class="lineNum"> 129</span> <span class="tlaUNC tlaBgUNC"> 0 : return 0;</span></span>
<span id="L130"><span class="lineNum"> 130</span> <span class="tlaGNC tlaBgGNC"> 44793 : double entropyValue = 0;</span></span>
<span id="L131"><span class="lineNum"> 131</span> <span class="tlaGNC"> 222747 : for (int value = 0; value &lt; featureCounts.sizes()[0]; ++value) {</span></span>
<span id="L132"><span class="lineNum"> 132</span> <span class="tlaGNC"> 177954 : double p_f = featureCounts[value].item&lt;double&gt;() / totalWeight;</span></span>
<span id="L133"><span class="lineNum"> 133</span> <span class="tlaGNC"> 177954 : double entropy_f = 0;</span></span>
<span id="L134"><span class="lineNum"> 134</span> <span class="tlaGNC"> 601680 : for (auto&amp; [label, jointCount] : jointCounts[value]) {</span></span>
<span id="L135"><span class="lineNum"> 135</span> <span class="tlaGNC"> 423726 : double p_l_f = jointCount / featureCounts[value].item&lt;double&gt;();</span></span>
<span id="L136"><span class="lineNum"> 136</span> <span class="tlaGNC"> 423726 : if (p_l_f &gt; 0) {</span></span>
<span id="L137"><span class="lineNum"> 137</span> <span class="tlaGNC"> 423726 : entropy_f -= p_l_f * log(p_l_f);</span></span>
<span id="L138"><span class="lineNum"> 138</span> : } else {</span>
<span id="L139"><span class="lineNum"> 139</span> <span class="tlaUNC tlaBgUNC"> 0 : entropy_f = 0;</span></span>
<span id="L140"><span class="lineNum"> 140</span> : }</span>
<span id="L141"><span class="lineNum"> 141</span> : }</span>
<span id="L142"><span class="lineNum"> 142</span> <span class="tlaGNC tlaBgGNC"> 177954 : entropyValue += p_f * entropy_f;</span></span>
<span id="L143"><span class="lineNum"> 143</span> : }</span>
<span id="L144"><span class="lineNum"> 144</span> <span class="tlaGNC"> 44793 : return entropyValue;</span></span>
<span id="L145"><span class="lineNum"> 145</span> <span class="tlaGNC"> 44793 : }</span></span>
<span id="L146"><span class="lineNum"> 146</span> : // I(X;Y) = H(Y) - H(Y|X)</span>
<span id="L147"><span class="lineNum"> 147</span> <span class="tlaGNC"> 44793 : double Metrics::mutualInformation(const torch::Tensor&amp; firstFeature, const torch::Tensor&amp; secondFeature, const torch::Tensor&amp; weights)</span></span>
<span id="L148"><span class="lineNum"> 148</span> : {</span>
<span id="L149"><span class="lineNum"> 149</span> <span class="tlaGNC"> 44793 : return entropy(firstFeature, weights) - conditionalEntropy(firstFeature, secondFeature, weights);</span></span>
<span id="L150"><span class="lineNum"> 150</span> : }</span>
<span id="L151"><span class="lineNum"> 151</span> : /*</span>
<span id="L152"><span class="lineNum"> 152</span> : Compute the maximum spanning tree considering the weights as distances</span>
<span id="L153"><span class="lineNum"> 153</span> : and the indices of the weights as nodes of this square matrix using</span>
<span id="L154"><span class="lineNum"> 154</span> : Kruskal algorithm</span>
<span id="L155"><span class="lineNum"> 155</span> : */</span>
<span id="L156"><span class="lineNum"> 156</span> <span class="tlaGNC"> 174 : std::vector&lt;std::pair&lt;int, int&gt;&gt; Metrics::maximumSpanningTree(const std::vector&lt;std::string&gt;&amp; features, const torch::Tensor&amp; weights, const int root)</span></span>
<span id="L157"><span class="lineNum"> 157</span> : {</span>
<span id="L158"><span class="lineNum"> 158</span> <span class="tlaGNC"> 174 : auto mst = MST(features, weights, root);</span></span>
<span id="L159"><span class="lineNum"> 159</span> <span class="tlaGNC"> 348 : return mst.maximumSpanningTree();</span></span>
<span id="L160"><span class="lineNum"> 160</span> <span class="tlaGNC"> 174 : }</span></span>
<span id="L161"><span class="lineNum"> 161</span> : }</span>
</pre>
</td>
</tr>