Skip to content

Commit 681e97a

Browse files
committed
add Full-Text Search
1 parent 77bf100 commit 681e97a

File tree

5 files changed

+1122
-413
lines changed

5 files changed

+1122
-413
lines changed

DatabaseService/ArchiveDatabase.backup

+1,014-385
Large diffs are not rendered by default.

DatabaseService/Database.cs

+86-23
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
using ServiceStack.Text;
88
using static ServiceStack.Text.JsonSerializer;
99
using ServiceStack.OrmLite;
10+
using ServiceStack.OrmLite.PostgreSQL;
1011

1112
namespace DatabaseService
1213
{
@@ -19,33 +20,48 @@ public static class Database
1920
static Database()
2021
{
2122
dbFactory = new OrmLiteConnectionFactory(ConnectionString, PostgreSqlDialect.Provider);
23+
bool empty;
2224
using (var db = dbFactory.Open())
2325
{
24-
if (db.CreateTableIfNotExists<NewsArticle>())
26+
empty = db.CreateTableIfNotExists<NewsArticle>();
27+
if (empty)
2528
{
26-
List<NewsArticle> News = LoadFromFile();
27-
News = News.OrderBy(art => art.Date).ToList();
28-
foreach (NewsArticle article in News)
29-
{
30-
db.Insert(article);
31-
}
32-
}
33-
SaveToFile();
34-
db.ExecuteSql(@"
29+
db.ExecuteSql(@"
3530
DROP TABLE IF EXISTS text_analysis
3631
;");
37-
db.ExecuteSql(@"
32+
db.ExecuteSql(@"
3833
CREATE TABLE text_analysis(
3934
id SERIAL PRIMARY KEY,
40-
article_id SERIAL REFERENCES news_article(id),
35+
article_id SERIAL REFERENCES news_article(id) ON DELETE CASCADE,
4136
tsvec tsvector
4237
);");
38+
}
4339
db.ExecuteSql(@"
44-
INSERT INTO text_analysis (article_id, tsvec)
45-
SELECT id, to_tsvector(text)
46-
FROM news_article
40+
CREATE INDEX IF NOT EXISTS url_index ON news_article (url)
4741
;");
42+
//db.CreateIndex<NewsArticle>(art => art.URL);
43+
NewsArticle egg = new NewsArticle(
44+
"tg: @Rigorich",
45+
"-",
46+
"Author",
47+
"Ãðèøàåâ Íèêèòà Ãðèãîðüåâè÷",
48+
System.Data.SqlTypes.SqlDateTime.MinValue.Value
49+
);
50+
if (!db.Exists<NewsArticle>(art => art.URL == egg.URL))
51+
{
52+
db.Insert(egg);
53+
}
54+
}
55+
if (empty)
56+
{
57+
List<NewsArticle> News = LoadFromFile();
58+
News = News.OrderBy(art => art.Date).ToList();
59+
foreach (NewsArticle article in News)
60+
{
61+
Add(article);
62+
}
4863
}
64+
SaveToFile();
4965
}
5066
static List<NewsArticle> LoadFromFile()
5167
{
@@ -90,12 +106,11 @@ public static void Add(NewsArticle article)
90106
if (!db.Exists<NewsArticle>(m => m.URL == article.URL && m.Text == article.Text))
91107
{
92108
db.Insert(article);
93-
long articleId = db.LastInsertId();
94109
db.ExecuteSql(@"
95110
INSERT INTO text_analysis (article_id, tsvec)
96-
SELECT id, to_tsvector(text)
111+
SELECT id, setweight(to_tsvector(name), 'A') || setweight(to_tsvector(text), 'D')
97112
FROM news_article
98-
WHERE id=" + articleId + @"
113+
WHERE url='" + article.URL + @"'
99114
;");
100115
}
101116
}
@@ -121,13 +136,61 @@ public static List<NewsArticle> GetFilteredList(ListRequest request)
121136
{
122137
var q = db.From<NewsArticle>();
123138
q = q.Where(art => request.LeftBoundDate <= art.Date && art.Date <= request.RightBoundDate);
124-
q = q.Where(art => art.URL.Contains(request.Url));
125-
//q = q.Where(request.Keywords);
126-
//q = q.Where(request.Entitities);
127-
q = request.OldestFirst ? q.OrderBy(art => art.Date) : q.OrderByDescending(art => art.Date);
139+
q = q.Where(art => art.URL.StartsWith(request.Url));
140+
141+
string keywords = request.Keywords;
142+
if (!keywords.IsNullOrEmpty())
143+
{
144+
keywords = keywords.Replace("\'", "\'\'");
145+
var results = db.Select<(int ArtId, float Rank)>(@"
146+
WITH query_rank AS (
147+
SELECT article_id AS art_id, ts_rank(tsvec, websearch_to_tsquery('" + keywords + @"')) AS ts_rank_value
148+
FROM text_analysis
149+
)
150+
151+
SELECT id, ts_rank_value
152+
FROM query_rank
153+
154+
INNER JOIN news_article
155+
ON query_rank.ts_rank_value > 0
156+
AND query_rank.art_id = news_article.id
157+
158+
ORDER BY ts_rank_value DESC
159+
;")
160+
.Map(tup =>
161+
new SearchRankResult()
162+
{
163+
ArtId = tup.ArtId,
164+
Rank = tup.Rank,
165+
Query = keywords
166+
});
167+
db.CreateTableIfNotExists<SearchRankResult>();
168+
foreach (SearchRankResult result in results)
169+
{
170+
if (!db.Exists<SearchRankResult>(r => r.ArtId == result.ArtId && r.Query == result.Query))
171+
{
172+
db.Insert(result);
173+
}
174+
}
175+
q = q.Join<NewsArticle, SearchRankResult>((art, rnk) => art.Id == rnk.ArtId && rnk.Query == keywords);
176+
q = q.OrderByDescending<NewsArticle, SearchRankResult>((art, rnk) => rnk.Rank);
177+
}
178+
else
179+
{
180+
q = request.OldestFirst ? q.OrderBy(art => art.Date) : q.OrderByDescending(art => art.Date);
181+
}
182+
183+
if (!(request.Entitities is null || request.Entitities.IsEmpty()))
184+
{
185+
q = q.UnsafeWhere(PgSql.Array(request.Entitities) + " <@ entities");
186+
}
128187
q = q.Limit(request.Skip, request.Count);
129-
q = q.Select(art => new { art.URL, art.Name, art.Date });
188+
q = q.Select(art => new { art.Id, art.URL, art.Name, art.Date });
130189
FilteredNews = db.Select(q).ToList();
190+
if (!keywords.IsNullOrEmpty())
191+
{
192+
db.Delete<SearchRankResult>(rnk => rnk.Query == keywords);
193+
}
131194
}
132195
return FilteredNews;
133196
}

DatabaseService/DatabaseRequest.cs

+3-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ public class ListRequest : IReturn<ListResponse>
2121
public string Url { get; set; } = null;
2222
public DateTime LeftBoundDate { get; set; } = DateTime.MinValue;
2323
public DateTime RightBoundDate { get; set; } = DateTime.MaxValue;
24-
public string[] Keywords { get; set; } = null;
24+
25+
public string Keywords { get; set; } = null;
26+
2527
public string[] Entitities { get; set; } = null;
2628

2729
public int Skip { get; set; } = 0;

DatabaseService/SearchRankResult.cs

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
using ServiceStack.DataAnnotations;
2+
3+
namespace DatabaseService
4+
{
5+
class SearchRankResult
6+
{
7+
[AutoIncrement]
8+
public long Id { get; set; }
9+
10+
public int ArtId { get; set; }
11+
public float Rank { get; set; }
12+
public string Query { get; set; }
13+
}
14+
}

TestClient/Program.cs

+5-4
Original file line numberDiff line numberDiff line change
@@ -111,21 +111,22 @@ static void Main(string[] args)
111111
Console.WriteLine("GET FILTER LIST");
112112
string url;
113113
DateTime? ldate, rdate;
114-
string[] keywords, entities;
114+
string keywords;
115+
string[] entities;
115116
Console.WriteLine("Enter left bound date:"); ldate = Console.ReadLine().ToDateTime();
116117
Console.WriteLine("Enter right bound date:"); rdate = Console.ReadLine().ToDateTime();
117118
Console.WriteLine("Enter url:"); url = Console.ReadLine();
118119

119120
static string[] ParseLine(string s)
120121
{
121-
return s.Split(",").Select(w => w.ToLowerInvariant().Trim()).Where(w => !w.IsNullOrEmpty()).ToArray();
122+
return s.Split(",").Select(w => w.Trim()).Where(w => !w.IsNullOrEmpty()).ToArray();
122123
}
123-
Console.WriteLine("Enter keywords separated by comma:"); keywords = ParseLine(Console.ReadLine());
124+
Console.WriteLine("Enter keywords query:"); keywords = Console.ReadLine();
124125
Console.WriteLine("Enter entities separated by comma:"); entities = ParseLine(Console.ReadLine());
125126

126127
Console.WriteLine($"URL = {url}");
127128
Console.WriteLine($"Dates = {ldate} - {rdate}");
128-
Console.WriteLine($"Keywords = {keywords.Join(", ")}");
129+
Console.WriteLine($"Keywords = {keywords}");
129130
Console.WriteLine($"Entities = {entities.Join(", ")}");
130131

131132
Console.WriteLine("OK? (Press Enter)");

0 commit comments

Comments
 (0)