Skip to content

Commit c2dc080

Browse files
committed
support partial match
1 parent ff4b5c5 commit c2dc080

File tree

11 files changed

+503
-240
lines changed

11 files changed

+503
-240
lines changed

expected/rum.out

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,3 +287,66 @@ INSERT INTO tst SELECT i%10, to_tsvector('simple', substr(md5(i::text), 1, 1)) F
287287
DELETE FROM tst WHERE i = 5;
288288
VACUUM tst;
289289
INSERT INTO tst SELECT i%10, to_tsvector('simple', substr(md5(i::text), 1, 1)) FROM generate_series(14001,15000) i;
290+
set enable_bitmapscan=off;
291+
explain (costs off)
292+
SELECT a <=> to_tsquery('pg_catalog.english', 'w:*'), *
293+
FROM test_rum
294+
WHERE a @@ to_tsquery('pg_catalog.english', 'w:*')
295+
ORDER BY a <=> to_tsquery('pg_catalog.english', 'w:*');
296+
QUERY PLAN
297+
-----------------------------------------
298+
Index Scan using rumidx on test_rum
299+
Index Cond: (a @@ '''w'':*'::tsquery)
300+
Order By: (a <=> '''w'':*'::tsquery)
301+
(3 rows)
302+
303+
SELECT a <=> to_tsquery('pg_catalog.english', 'w:*'), *
304+
FROM test_rum
305+
WHERE a @@ to_tsquery('pg_catalog.english', 'w:*')
306+
ORDER BY a <=> to_tsquery('pg_catalog.english', 'w:*');
307+
?column? | t | a
308+
----------+--------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------
309+
8.22467 | so well that only a fragment, as it were, gave way. It still hangs as if | 'fragment':6 'gave':10 'hang':14 'still':13 'way':11 'well':2
310+
8.22467 | wine, but wouldn’t you divide with your neighbors! The columns in the | 'column':11 'divid':6 'neighbor':9 'wine':1 'wouldn':3
311+
8.22467 | not say, but you wrote as if you knew it by sight as well as by heart. | 'heart':17 'knew':9 'say':2 'sight':12 'well':14 'wrote':5
312+
16.4493 | little series of pictures. Have you ever been here, I wonder? You did | 'ever':7 'littl':1 'pictur':4 'seri':2 'wonder':11
313+
16.4493 | itself. Put on your “specs” and look at the castle, half way up the | 'castl':10 'half':11 'look':7 'put':2 'spec':5 'way':12
314+
16.4493 | _berg_, “the Jettenhuhl, a wooded spur of the Konigestuhl.” Look at it | 'berg':1 'jettenhuhl':3 'konigestuhl':9 'look':10 'spur':6 'wood':5
315+
16.4493 | thickness of the walls, twenty-one feet, and the solid masonry, held it | 'feet':8 'held':13 'masonri':12 'one':7 'solid':11 'thick':1 'twenti':6 'twenty-on':5 'wall':4
316+
16.4493 | ornamental building, and I wish you could see it, if you have not seen | 'build':2 'could':7 'ornament':1 'see':8 'seen':14 'wish':5
317+
16.4493 | thinking--“to go or not to go?” We are this far on the way. Reached | 'far':11 'go':3,7 'reach':15 'think':1 'way':14
318+
16.4493 | curious spectacle, but on the whole had “the banquet-hall deserted” | 'banquet':10 'banquet-hal':9 'curious':1 'desert':12 'hall':11 'spectacl':2 'whole':6
319+
16.4493 | As a reward for your reformation I write to you on this precious sheet. | 'precious':13 'reform':6 'reward':3 'sheet':14 'write':8
320+
16.4493 | entrance of the Black Forest, among picturesque, thickly-wooded hills, | 'among':6 'black':4 'entranc':1 'forest':5 'hill':11 'picturesqu':7 'thick':9 'thickly-wood':8 'wood':10
321+
16.4493 | You see I have come to be wonderfully attached to Heidelberg, the | 'attach':9 'come':5 'heidelberg':11 'see':2 'wonder':8
322+
16.4493 | my appreciation of you in a more complimentary way than by sending this | 'appreci':2 'complimentari':8 'send':12 'way':9
323+
(14 rows)
324+
325+
SELECT a <=> to_tsquery('pg_catalog.english', 'b:*'), *
326+
FROM test_rum
327+
WHERE a @@ to_tsquery('pg_catalog.english', 'b:*')
328+
ORDER BY a <=> to_tsquery('pg_catalog.english', 'b:*');
329+
?column? | t | a
330+
----------+--------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------
331+
8.22467 | been trying my best to get all those “passes” into my brain. Now, thanks | 'best':4 'brain':12 'get':6 'pass':9 'thank':14 'tri':2
332+
8.22467 | All the above information, I beg you to believe, I do not intend you | 'beg':6 'believ':9 'inform':4 'intend':13
333+
8.22467 | curious spectacle, but on the whole had “the banquet-hall deserted” | 'banquet':10 'banquet-hal':9 'curious':1 'desert':12 'hall':11 'spectacl':2 'whole':6
334+
8.22467 | oaks, limes and maples, bordered with flower-beds and shrubberies, and | 'bed':9 'border':5 'flower':8 'flower-b':7 'lime':2 'mapl':4 'oak':1 'shrubberi':11
335+
13.1595 | foo bar foo the over foo qq bar | 'bar':2,8 'foo':1,3,6 'qq':7
336+
16.4493 | ornamental building, and I wish you could see it, if you have not seen | 'build':2 'could':7 'ornament':1 'see':8 'seen':14 'wish':5
337+
16.4493 | the--nearest guide-book! | 'book':5 'guid':4 'guide-book':3 'nearest':2
338+
16.4493 | to your letter, I have them all in the handiest kind of a bunch. Ariel | 'ariel':15 'bunch':14 'handiest':10 'kind':11 'letter':3
339+
16.4493 | beautiful, the quaint, the historically poetic, learned and picturesque | 'beauti':1 'histor':5 'learn':7 'picturesqu':9 'poetic':6 'quaint':3
340+
16.4493 | there are dreadful reports of floods and roads caved in and bridges | 'bridg':12 'cave':9 'dread':3 'flood':6 'report':4 'road':8
341+
16.4493 | the Conversationhaus, the bazaar, mingling with the throng, listening to | 'bazaar':4 'conversationhaus':2 'listen':9 'mingl':5 'throng':8
342+
16.4493 | the band, and comparing what it is with what it was. It was a gay and | 'band':2 'compar':4 'gay':15
343+
16.4493 | look. The situation is most beautiful. It lies, you know, at the | 'beauti':6 'know':10 'lie':8 'look':1 'situat':3
344+
16.4493 | entrance of the Black Forest, among picturesque, thickly-wooded hills, | 'among':6 'black':4 'entranc':1 'forest':5 'hill':11 'picturesqu':7 'thick':9 'thickly-wood':8 'wood':10
345+
16.4493 | town with angry, headlong speed. There is an avenue along its bank of | 'along':10 'angri':3 'avenu':9 'bank':12 'headlong':4 'speed':5 'town':1
346+
16.4493 | like, “I’ll do my bidding gently,” and as surely, if I get there. But | 'bid':6 'gentl':7 'get':13 'like':1 'll':3 'sure':10
347+
16.4493 | _berg_, “the Jettenhuhl, a wooded spur of the Konigestuhl.” Look at it | 'berg':1 'jettenhuhl':3 'konigestuhl':9 'look':10 'spur':6 'wood':5
348+
16.4493 | Gesprente Thurm is the one that was blown up by the French. The | 'blown':8 'french':12 'gesprent':1 'one':5 'thurm':2
349+
16.4493 | portico that shows in the Schlosshof are the four brought from | 'brought':10 'four':9 'portico':1 'schlosshof':6 'show':3
350+
16.4493 | the few that escaped destruction in 1693. It is a beautiful, highly | '1693':7 'beauti':11 'destruct':5 'escap':4 'high':12
351+
(20 rows)
352+

rum--1.0.sql

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,11 @@ RETURNS float8
7575
AS 'MODULE_PATHNAME'
7676
LANGUAGE C IMMUTABLE STRICT;
7777

78+
CREATE FUNCTION rum_ts_join_pos(internal, internal) -- to prevent calling from SQL
79+
RETURNS bytea
80+
AS 'MODULE_PATHNAME'
81+
LANGUAGE C IMMUTABLE STRICT;
82+
7883
CREATE OPERATOR CLASS rum_tsvector_ops
7984
FOR TYPE tsvector USING rum
8085
AS
@@ -88,6 +93,7 @@ AS
8893
FUNCTION 6 rum_tsvector_config(internal),
8994
FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal),
9095
FUNCTION 8 rum_tsquery_distance(internal,smallint,tsvector,int,internal,internal,internal,internal,internal),
96+
FUNCTION 10 rum_ts_join_pos(internal, internal),
9197
STORAGE text;
9298

9399
-- timestamp ops

rum.h

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -374,13 +374,15 @@ typedef struct RumState
374374
FmgrInfo preConsistentFn[INDEX_MAX_KEYS]; /* optional method */
375375
FmgrInfo orderingFn[INDEX_MAX_KEYS]; /* optional method */
376376
FmgrInfo outerOrderingFn[INDEX_MAX_KEYS]; /* optional method */
377+
FmgrInfo joinAddInfoFn[INDEX_MAX_KEYS]; /* optional method */
377378
/* canPartialMatch[i] is true if comparePartialFn[i] is valid */
378379
bool canPartialMatch[INDEX_MAX_KEYS];
379380
/* canPreConsistent[i] is true if preConsistentFn[i] is valid */
380381
bool canPreConsistent[INDEX_MAX_KEYS];
381382
/* canOrdering[i] is true if orderingFn[i] is valid */
382383
bool canOrdering[INDEX_MAX_KEYS];
383384
bool canOuterOrdering[INDEX_MAX_KEYS];
385+
bool canJoinAddInfo[INDEX_MAX_KEYS];
384386
/* Collations to pass to the support functions */
385387
Oid supportCollation[INDEX_MAX_KEYS];
386388
} RumState;
@@ -599,7 +601,6 @@ typedef struct RumScanKeyData
599601
* key cannot succeed for any later TIDs.
600602
*/
601603
RumKey curItem;
602-
bool hadLossyEntry;
603604
bool curItemMatches;
604605
bool recheckCurItem;
605606
bool isFinished;
@@ -630,9 +631,9 @@ typedef struct RumScanEntryData
630631

631632
/* for a partial-match or full-scan query, we accumulate all TIDs here */
632633
bool forceUseBitmap;
633-
TIDBitmap *matchBitmap;
634-
TBMIterator *matchIterator;
635-
TBMIterateResult *matchResult;
634+
/* or here if we need to store addinfo */
635+
Tuplesortstate *matchSortstate;
636+
RumKey collectRumKey;
636637

637638
/* for full-scan query with order-by */
638639
RumBtreeStack *stack;
@@ -765,7 +766,8 @@ extern RumKey *rumGetBAEntry(BuildAccumulator *accum,
765766
#define RUM_PRE_CONSISTENT_PROC 7
766767
#define RUM_ORDERING_PROC 8
767768
#define RUM_OUTER_ORDERING_PROC 9
768-
#define RUMNProcs 9
769+
#define RUM_ADDINFO_JOIN 10
770+
#define RUMNProcs 10
769771

770772
extern Datum rum_extract_tsvector(PG_FUNCTION_ARGS);
771773
extern Datum rum_extract_tsquery(PG_FUNCTION_ARGS);

rum_ts_utils.c

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ PG_FUNCTION_INFO_V1(rum_tsquery_distance);
3535
PG_FUNCTION_INFO_V1(rum_ts_distance_tt);
3636
PG_FUNCTION_INFO_V1(rum_ts_distance_ttf);
3737
PG_FUNCTION_INFO_V1(rum_ts_distance_td);
38+
PG_FUNCTION_INFO_V1(rum_ts_join_pos);
3839

3940
PG_FUNCTION_INFO_V1(tsquery_to_distance_query);
4041

@@ -1314,3 +1315,76 @@ rum_tsvector_config(PG_FUNCTION_ARGS)
13141315

13151316
PG_RETURN_VOID();
13161317
}
1318+
1319+
Datum
1320+
rum_ts_join_pos(PG_FUNCTION_ARGS)
1321+
{
1322+
Datum addInfo1 = PG_GETARG_DATUM(0);
1323+
Datum addInfo2 = PG_GETARG_DATUM(1);
1324+
char *in1 = VARDATA_ANY(addInfo1),
1325+
*in2 = VARDATA_ANY(addInfo2);
1326+
bytea *result;
1327+
int count1 = count_pos(in1, VARSIZE_ANY_EXHDR(addInfo1)),
1328+
count2 = count_pos(in2, VARSIZE_ANY_EXHDR(addInfo2)),
1329+
countRes = 0,
1330+
i1 = 0, i2 = 0, size;
1331+
uint16 pos1, pos2, *pos;
1332+
1333+
result = palloc(VARHDRSZ + sizeof(uint16) * (count1 + count2));
1334+
pos = palloc(sizeof(uint16) * (count1 + count2));
1335+
1336+
Assert(count1 > 0 && count2 > 0);
1337+
1338+
1339+
in1 = decompress_pos(in1, &pos1);
1340+
in2 = decompress_pos(in2, &pos2);
1341+
1342+
while(i1 < count1 && i2 < count2)
1343+
{
1344+
if (WEP_GETPOS(pos1) > WEP_GETPOS(pos2))
1345+
{
1346+
pos[countRes++] = pos2;
1347+
if (i2 < count2)
1348+
in2 = decompress_pos(in2, &pos2);
1349+
i2++;
1350+
}
1351+
else if (WEP_GETPOS(pos1) < WEP_GETPOS(pos2))
1352+
{
1353+
pos[countRes++] = pos1;
1354+
if (i1 < count1)
1355+
in1 = decompress_pos(in1, &pos1);
1356+
i1++;
1357+
}
1358+
else
1359+
{
1360+
pos[countRes++] = pos1;
1361+
if (i1 < count1)
1362+
in1 = decompress_pos(in1, &pos1);
1363+
if (i2 < count2)
1364+
in2 = decompress_pos(in2, &pos2);
1365+
i1++;
1366+
i2++;
1367+
}
1368+
}
1369+
1370+
while(i1 < count1)
1371+
{
1372+
pos[countRes++] = pos1;
1373+
if (i1 < count1)
1374+
in1 = decompress_pos(in1, &pos1);
1375+
i1++;
1376+
}
1377+
1378+
while(i2 < count2)
1379+
{
1380+
pos[countRes++] = pos2;
1381+
if (i2 < count2)
1382+
in2 = decompress_pos(in2, &pos2);
1383+
i2++;
1384+
}
1385+
1386+
size = compress_pos(result->vl_dat, pos, countRes) + VARHDRSZ;
1387+
SET_VARSIZE(result, size);
1388+
1389+
PG_RETURN_BYTEA_P(result);
1390+
}

0 commit comments

Comments
 (0)