Skip to content

Commit f3bccff

Browse files
authored
Merge pull request #185 from Mdwiki-TD/update_new
improve results
2 parents 635647d + 9eab1c8 commit f3bccff

28 files changed

+628
-268
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,3 +54,4 @@ run.sh
5454
/src/.claude
5555
src/Plan.md
5656
tt.md
57+
src/get_results_api.php

src/backend/loaders/load_request.php

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ function load_request()
2121
$test = htmlspecialchars($_GET['test'] ?? '', ENT_QUOTES, 'UTF-8');
2222
$doit = htmlspecialchars($_GET['doit'] ?? '', ENT_QUOTES, 'UTF-8');
2323
$code = htmlspecialchars($_GET['code'] ?? '', ENT_QUOTES, 'UTF-8');
24+
$filter_sparql = !empty($_GET['filter_sparql'] ?? '') ? true : false;
2425
//---
2526
if ($code == 'undefined') $code = "";
2627
//---
@@ -54,6 +55,7 @@ function load_request()
5455
'cat' => $cat,
5556
'camp' => $camp,
5657
'tra_type' => $tra_type,
57-
'code_lang_name' => $code_lang_name
58+
'filter_sparql' => $filter_sparql,
59+
'code_lang_name' => $code_lang_name,
5860
];
5961
}

src/backend/results/fetch_cat_data.php

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,36 +11,33 @@
1111

1212
use function Results\GetCats\get_mdwiki_cat_members;
1313
use function TD\Render\TestPrint\test_print;
14-
use function Results\ResultsHelps\get_lang_exists_pages;
14+
use function Results\ResultsHelps\get_lang_exists_pages_from_cache;
1515

1616
function get_cat_exists_and_missing($cat, $depth, $code, $use_cache = true): array
1717
{
1818
// Fetch category members
1919
// $members = get_mdwiki_cat_members($cat, $use_cache = $use_cache, $depth = $depth, $camp = $camp);
2020
$members = get_mdwiki_cat_members($cat, $depth, $use_cache);
2121

22-
test_print("get_cat_exists_and_missing Members size: " . count($members));
22+
test_print("get_cat_exists and_missing Members size: " . count($members));
2323

24-
$exists = get_lang_exists_pages($code);
24+
$exists = get_lang_exists_pages_from_cache($code);
2525
// ---
2626
// pages that exist in $exists and $members
2727
$exists = array_intersect($members, $exists);
2828
// ---
29-
// ---
30-
// var_dump($exists);
29+
// change from ("{"6":"Video:Cancer"}") to (["Video:Cancer"])
30+
// $exists = array_values($exists);
3131
// ---
3232
// Find missing members
3333
$missing = array_diff($members, $exists);
3434

3535
$missing = array_unique($missing);
36-
37-
// Calculate the length of existing pages
38-
$exs_len = count($members) - count($missing);
39-
40-
// test_print("End of get_cat_exists_and_missing <br>===============================");
36+
// ---
37+
// ---
38+
// test_print("End of get_cat exists_and_missing <br>===============================");
4139

4240
return [
43-
"len_of_exists" => $exs_len,
4441
"missing" => $missing,
4542
"exists" => $exists,
4643
];

src/backend/results/get_results.php

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,20 @@
55
/*
66
Usage:
77
8-
use function Results\GetResults\get_results;
8+
use function Results\GetResults\get_results; // get_results($cat, $camp, $depth, $code)
99
1010
*/
1111

1212
use Tables\SqlTables\TablesSql;
13-
// use function Results\FetchCatData\get_cat_exists_and_missing;
14-
use function Results\FetchCatDataSparql\get_cat_exists_and_missing;
15-
use function Results\GetCats\get_mdwiki_cat_members;
13+
use function Results\FetchCatData\get_cat_exists_and_missing;
14+
use function Results\SparqlBot\filter_existing_out;
1615
use function TD\Render\TestPrint\test_print;
1716
use function SQLorAPI\Process\get_lang_in_process_new;
17+
use function SQLorAPI\Funcs\get_lang_pages_by_cat;
18+
use function Results\ResultsHelps\make_exists_targets;
19+
use function Results\ResultsHelps\filter_items_missing_cat2;
20+
use function Results\ResultsHelps\create_summary;
21+
1822

1923
function getinprocess($missing, $code)
2024
{
@@ -26,26 +30,31 @@ function getinprocess($missing, $code)
2630
return $titles;
2731
}
2832

29-
function get_results($cat, $camp, $depth, $code): array
33+
function get_results($cat, $camp, $depth, $code, $filter_sparql): array
3034
{
3135
// Get existing and missing pages
3236
// ---
33-
$items = get_cat_exists_and_missing($cat, $depth, $code, true) ?: [];
37+
$targets_via_td = get_lang_pages_by_cat($code, $cat);
38+
//---
39+
$items = get_cat_exists_and_missing($cat, $depth, $code, true);
3440
// ---
35-
$len_of_exists_pages = $items['len_of_exists'];
3641
$items_missing = $items['missing'];
37-
3842
$items_exists = $items['exists'];
39-
43+
$len_of_exists_pages = count($items_exists);
44+
// ---
45+
if (!empty($filter_sparql)) {
46+
[$items_exists, $items_missing] = filter_existing_out($items_missing, $items_exists, $code);
47+
}
48+
// ---
49+
$items_exists = make_exists_targets($targets_via_td, $items_exists, $code, $cat);
50+
// ---
4051
test_print("Items missing: " . count($items_missing));
4152

4253
// Check for a secondary category
4354
$cat2 = TablesSql::$s_camps_cat2[$camp] ?? '';
55+
4456
if (!empty($cat2) && $cat2 !== $cat) {
45-
// $cat2_members = get_mdwiki_cat_members($cat2, $use_cache = true, $depth = $depth, $camp = $camp);
46-
$cat2_members = get_mdwiki_cat_members($cat2, $depth, true);
47-
$items_missing = array_intersect($items_missing, $cat2_members);
48-
test_print("Items missing after intersecting with cat2: " . count($items_missing));
57+
$items_missing = filter_items_missing_cat2($items_missing, $cat2, $depth);
4958
}
5059

5160
test_print("Length of existing pages: $len_of_exists_pages");
@@ -57,27 +66,18 @@ function get_results($cat, $camp, $depth, $code): array
5766
$inprocess = getinprocess($missing, $code);
5867
$len_inprocess = count($inprocess);
5968

60-
// Calculate totals
61-
$len_of_missing_pages = count($missing);
62-
$len_of_all = $len_of_exists_pages + $len_of_missing_pages;
63-
64-
// Prepare category URL
65-
$cat2 = "Category:" . str_replace('Category:', '', $cat);
66-
$caturl = "<a href='https://mdwiki.org/wiki/$cat2' target='_blank'>category</a>";
67-
68-
// Generate summary message
69-
$ix = "Found $len_of_all pages in $caturl, $len_of_exists_pages exists, and $len_of_missing_pages missing in (<a href='https://$code.wikipedia.org' target='_blank'>$code</a>), $len_inprocess In process.";
70-
7169
// Remove in-process items from missing list
7270
if ($len_inprocess > 0) {
7371
$inprocess_2 = array_column($inprocess, 'title');
7472
$missing = array_diff($missing, $inprocess_2);
7573
}
7674

75+
$summary = create_summary($code, $cat, $inprocess, $missing, $len_of_exists_pages);
76+
7777
return [
7878
"inprocess" => $inprocess,
7979
"exists" => $items_exists,
8080
"missing" => $missing,
81-
"ix" => $ix,
81+
"ix" => $summary,
8282
];
8383
}

src/backend/results/helps.php

Lines changed: 87 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,19 @@
55
/*
66
Usage:
77
8-
use function Results\ResultsHelps\get_lang_exists_pages;
8+
use function Results\ResultsHelps\get_lang_exists_pages_from_cache;
99
use function Results\ResultsHelps\open_json_file;
10+
use function Results\ResultsHelps\make_exists_targets;
11+
use function Results\ResultsHelps\filter_items_missing_cat2;
12+
use function Results\ResultsHelps\create_summary;
1013
1114
*/
1215

1316
use function TD\Render\TestPrint\test_print;
1417
use function Tables\TablesDir\open_td_Tables_file;
18+
use function SQLorAPI\Funcs\exists_by_qids_query;
19+
use function Results\GetCats\get_mdwiki_cat_members;
20+
use function TD\Render\Html\make_mdwiki_cat_url;
1521

1622
function open_json_file($file_path)
1723
{
@@ -35,10 +41,89 @@ function open_json_file($file_path)
3541
return $data;
3642
}
3743

38-
function get_lang_exists_pages($code)
44+
function get_lang_exists_pages_from_cache($code)
3945
{
46+
// example of result like: [ "Spontaneous bacterial peritonitis", "Dronedarone", ... ]
47+
// ---
4048
$json_file = "cash_exists/$code.json";
4149
$exists = open_td_Tables_file($json_file);
4250

4351
return $exists;
4452
}
53+
54+
55+
function make_exists_targets($targets_via_td, $exists, $code, $cat)
56+
{
57+
//---
58+
// $targets_via_td = array_column($targets_via_td, 'target', 'title');
59+
$targets_via_td = array_column($targets_via_td, null, 'title');
60+
// { "id": 6982, "title": "Video:Abdominal thrusts", "word": 117, "translate_type": "all", "cat": "RTTVideo", "lang": "ar", "user": "Mr. Ibrahem", "target": "ويكيبيديا:فيديوويكي\/ضغطات البطن", "date": "2025-02-06", "pupdate": "2025-02-06", "add_date": "2025-02-06 03:00:00", "deleted": 0, "mdwiki_revid": null }
61+
//---
62+
$exists_targets_before = exists_by_qids_query($code, $cat);
63+
// $exists_targets_before = array_column($exists_targets_before, 'target', 'title');
64+
$exists_targets_before = array_column($exists_targets_before, null, 'title');
65+
// { "qid": "Q133005500", "title": "Video:Abdominal thrusts", "category": "RTTVideo", "code": "ar", "target": "ويكيبيديا:فيديوويكي\/ضغطات البطن" }
66+
//---
67+
$tab = [];
68+
//---
69+
foreach ($exists as $_ => $title) {
70+
$td_link = $targets_via_td[$title] ?? [];
71+
$before_link = $exists_targets_before[$title] ?? [];
72+
// ---
73+
$one_tab = [];
74+
// ---
75+
$one_tab["qid"] = $td_link['qid'] ?? $before_link['qid'] ?? "";
76+
$one_tab["title"] = $td_link['title'] ?? $before_link['title'] ?? "";
77+
// ---
78+
$one_tab["target"] = $td_link['target'] ?? $before_link['target'] ?? "";
79+
// ---
80+
// ---
81+
if ($td_link['target'] ?? "") {
82+
$one_tab["via"] = "td";
83+
// ---
84+
$one_tab["pupdate"] = $td_link['pupdate'] ?? "";
85+
$one_tab["user"] = $td_link['user'] ?? "";
86+
// ---
87+
} elseif ($before_link['target'] ?? "") {
88+
$one_tab["via"] = "before";
89+
} else {
90+
$one_tab["via"] = "none";
91+
}
92+
// ---
93+
$tab[$title] = $one_tab;
94+
}
95+
//---
96+
return $tab;
97+
}
98+
99+
100+
function filter_items_missing_cat2($items_missing, $cat2, $depth)
101+
{
102+
// ---
103+
// $cat2_members = get_mdwiki_cat_members($cat2, $use_cache = true, $depth = $depth, $camp = $camp);
104+
// ---
105+
$cat2_members = get_mdwiki_cat_members($cat2, $depth, true);
106+
// ---
107+
$items_missing = array_intersect($items_missing, $cat2_members);
108+
test_print("Items missing after intersecting with cat2: " . count($items_missing));
109+
// ---
110+
return $items_missing;
111+
}
112+
113+
function create_summary($code, $cat, $inprocess, $missing, $len_of_exists_pages)
114+
{
115+
116+
$len_inprocess = count($inprocess);
117+
118+
// Calculate totals
119+
$len_of_missing_pages = count($missing);
120+
$len_of_all = $len_of_exists_pages + $len_of_missing_pages + $len_inprocess;
121+
122+
// Prepare category URL
123+
$caturl = make_mdwiki_cat_url($cat, "Category");
124+
125+
// Generate summary message
126+
$summary = "Found $len_of_all pages in $caturl, $len_of_exists_pages exists, and $len_of_missing_pages missing in (<a href='https://$code.wikipedia.org' target='_blank'>$code</a>), $len_inprocess In process.";
127+
// ---
128+
return $summary;
129+
}

src/backend/results/sparql_bots/fetch_cat_data_sparql.php

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
use function Results\GetCats\get_mdwiki_cat_members;
1313
use function TD\Render\TestPrint\test_print;
14-
use function Results\ResultsHelps\get_lang_exists_pages;
14+
use function Results\ResultsHelps\get_lang_exists_pages_from_cache;
1515
use function Results\SparqlBot\filter_existing_out;
1616

1717
function get_cat_exists_and_missing($cat, $depth, $code, $use_cache = true): array
@@ -20,32 +20,26 @@ function get_cat_exists_and_missing($cat, $depth, $code, $use_cache = true): arr
2020
// $members = get_mdwiki_cat_members($cat, $use_cache = $use_cache, $depth = $depth, $camp = $camp);
2121
$members = get_mdwiki_cat_members($cat, $depth, $use_cache);
2222

23-
test_print("get_cat_exists_and_missing Members size: " . count($members));
23+
test_print("get_cat_exists and_missing Members size: " . count($members));
2424

25-
$exists = get_lang_exists_pages($code);
25+
$exists = get_lang_exists_pages_from_cache($code);
2626
// ---
2727
// pages that exist in $exists and $members
2828
$exists = array_intersect($members, $exists);
2929
// ---
30-
$exists = array_values($exists);
31-
// ---
32-
// var_dump($exists);
30+
// change from ("{"6":"Video:Cancer"}") to (["Video:Cancer"])
31+
// $exists = array_values($exists);
3332
// ---
3433
// Find missing members
3534
$missing = array_diff($members, $exists);
3635

3736
$missing = array_unique($missing);
3837
// ---
39-
$missing = filter_existing_out($missing, $code);
38+
[$exists, $missing] = filter_existing_out($missing, $exists, $code);
4039
// ---
41-
42-
// Calculate the length of existing pages
43-
$exs_len = count($members) - count($missing);
44-
45-
// test_print("End of get_cat_exists_and_missing <br>===============================");
40+
// test_print("End of get_cat exists_and_missing <br>===============================");
4641

4742
return [
48-
"len_of_exists" => $exs_len,
4943
"missing" => $missing,
5044
"exists" => $exists,
5145
];

src/backend/results/sparql_bots/sparql_bot.php

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ function sparql_query_result($with_qids, $code): array
6060
$result = get_query_result($sparql);
6161
//---
6262
foreach ($result as $item) {
63-
$table[] = $item['item'];
63+
$table[$item['item']] = $item['article'];
6464
}
6565
}
6666
//---
@@ -82,9 +82,13 @@ function get_sparql_data_exists($with_qids, $code): array
8282
//---
8383
$qids_exists = sparql_query_result($with_qids, $code);
8484
//---
85+
$qids_exists_keys = array_keys($qids_exists);
86+
//---
8587
foreach ($with_qids as $title => $qid) {
86-
if (in_array($qid, $qids_exists)) {
87-
$EXISTS[] = $title;
88+
if (in_array($qid, $qids_exists_keys)) {
89+
// $EXISTS[] = $title;
90+
$article = $qids_exists[$qid];
91+
$EXISTS[$title] = $article;
8892
}
8993
}
9094
//---
@@ -120,7 +124,7 @@ function get_qids($list)
120124
];
121125
}
122126

123-
function filter_existing_out($missing, $code): array
127+
function filter_existing_out($missing, $exists, $code): array
124128
{
125129
//---
126130
$missing2 = [];
@@ -140,14 +144,18 @@ function filter_existing_out($missing, $code): array
140144
print_r_it($sparql_exists, 'sparql_exists', $r = 1);
141145
// ---
142146
if (count($sparql_exists) == 0) {
143-
return $missing;
147+
return [$exists, $missing];
144148
}
145149
//---
146150
$new_missings = [];
147151
//---
152+
$sparql_exists_keys = array_keys($sparql_exists);
153+
//---
148154
// Filter out titles that exist in $sparql_exists from $missing
149155
foreach ($missing as $title) {
150-
if (!in_array($title, $sparql_exists)) {
156+
if (in_array($title, $sparql_exists_keys)) {
157+
$exists[] = $title;
158+
} else {
151159
$new_missings[] = $title;
152160
}
153161
};
@@ -156,5 +164,5 @@ function filter_existing_out($missing, $code): array
156164
// ---
157165
test_print("filter_existing_out sparql_exists count: " . count($sparql_exists));
158166
// ---
159-
return $new_missings;
167+
return [$exists, $new_missings];
160168
}

0 commit comments

Comments
 (0)