0% found this document useful (0 votes)
44 views

Vector Search Demo Commands

Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
44 views

Vector Search Demo Commands

Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 12

@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

@@@@@@@@@@@
COMMANDS
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@@@@@@@@@@

===================================================
Sample Table Creation With Vector Datatype:
===================================================

CREATE TABLE house_for_sale (house_id NUMBER,


price NUMBER,
city VARCHAR2(400),
house_photo BLOB,
house_vector VECTOR);

===================================================================================
DEMO TO ILLUSTRATE THE WORKFLOW OF VECTOR SEARCH ARCHITECTURE USING PDF DOCUMENTS
===================================================================================

1. Create Tablespace, DB User and Grant privileges


~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

$ sqlplus sys/<passwd>@<pdb_service_name> as sysdba

SQL> CREATE TABLESPACE tbs1 DATAFILE 'tbs5.dbf' SIZE 5G AUTOEXTEND ON EXTENT


MANAGEMENT LOCAL SEGMENT SPACE MANAGEMENT AUTO;

SQL> drop user vector cascade;


SQL> create user vector identified by vector DEFAULT TABLESPACE tbs1 quota
unlimited on tbs1;
SQL> grant DB_DEVELOPER_ROLE to vector;

SQL> create or replace directory VEC_DUMP as '/tmp/my_local_dir/';


SQL> grant read, write on directory vec_dump to vector;

2. Load your embedding model into the Oracle Database


~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Using the DBMS_VECTOR package, load your embedding model into the Oracle Database.
You must specify the directory where you stored your model in ONNX format as well
as describe what type of model it is and how you want to use it.

$ sqlplus vector/<passwd>@<pdb_service_name>

Syntax: exec dbms_vector.drop_onnx_model(model_name =>


'<user_specified_model_name>', force => true);
Syntax: exec dbms_vector.load_onnx_model(directory => '<db_directory_name>',
filename => '<onnx_format_embedding_file>', model_name =>
'<user_specified_name_for_this_model>', metadata =>
'<A_JSON_description_of_the_metadata_describing_the_model>');

Note:
At minimum, the JSON metadata must describe the machine learning 'function'
supported by the model.

SQL> exec dbms_vector.drop_onnx_model(model_name => 'doc_model', force => true);


SQL> exec dbms_vector.load_onnx_model('VEC_DUMP', 'my_embedding_model.onnx',
'doc_model', JSON('{"function" : "embedding", "embeddingOutput" : "embedding" ,
"input":{"input": ["DATA"]}}'));
or
SQL> exec dbms_data_mining.import_onnx_model('my_embedding_model.onnx',
'doc_model', JSON('{"function" : "embedding", "embeddingOutput" : "embedding" ,
"input":{"input": ["DATA"]}}'));

3. Create a relational table to store books in the PDF format


~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

SQL> drop table documentation_tab purge;


SQL> create table documentation_tab (id number, data blob);
SQL> insert into documentation_tab values(1, to_blob(bfilename('VEC_DUMP',
'database-concepts23ai.pdf')));
SQL> insert into documentation_tab values(2, to_blob(bfilename('VEC_DUMP', 'oracle-
ai-vector-search-users-guide.pdf')));
SQL> commit;
SQL> select dbms_lob.getlength(data) from documentation_tab;

4. Create a relational table to store unstructured data chunks and associated


vector embeddings
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~~~~~~~~~~~~

SQL> drop table doc_chunks purge;


SQL> create table doc_chunks (doc_id number, chunk_id number, chunk_data
varchar2(4000), chunk_embedding vector);

SQL>
insert into doc_chunks
select dt.id doc_id, et.embed_id chunk_id, et.embed_data chunk_data,
to_vector(et.embed_vector) chunk_embedding
from
documentation_tab dt,
dbms_vector_chain.utl_to_embeddings(
dbms_vector_chain.utl_to_chunks(dbms_vector_chain.utl_to_text(dt.data),
json('{"normalize":"all"}')),
json('{"provider":"database", "model":"doc_model"}')) t,
JSON_TABLE(t.column_value, '$[*]' COLUMNS (embed_id NUMBER PATH '$.embed_id',
embed_data VARCHAR2(4000) PATH '$.embed_data', embed_vector CLOB PATH
'$.embed_vector')) et;

SQL> commit;

Syntax for DBMS_VECTOR_CHAIN.UTL_TO_EMBEDDINGS: -- Converts data to one or more


vector embeddings
------------------------------------------------
DBMS_VECTOR_CHAIN.UTL_TO_EMBEDDINGS (
DATA IN VECTOR_ARRAY_T,
PARAMS IN JSON default NULL
) return VECTOR_ARRAY_T;

Syntax for DBMS_VECTOR_CHAIN.UTL_TO_CHUNKS: -- Splits data into smaller pieces or


chunks
-------------------------------------------
DBMS_VECTOR_CHAIN.UTL_TO_CHUNKS (
DATA IN CLOB | VARCHAR2
PARAMS IN JSON default NULL
) return VECTOR_ARRAY_T;

Syntax for DBMS_VECTOR_CHAIN.UTL_TO_TEXT: -- Extracts plain text data from


documents
-----------------------------------------
DBMS_VECTOR_CHAIN.UTL_TO_TEXT (
DATA IN CLOB | BLOB,
PARAMS IN JSON default NULL
) return CLOB;

5. Generate a query vector for use in a similarity search


~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

For a similarity search you will need query vectors. Here you enter your query text
and generate an associated vector embedding.

SQL> ACCEPT text_input CHAR PROMPT 'Enter text: '


Type: "different methods of backup and recovery"

SQL> VARIABLE text_variable VARCHAR2(1000)


SQL> VARIABLE query_vector VECTOR
SQL>
BEGIN
:text_variable := '&text_input';
SELECT vector_embedding(doc_model using :text_variable as data)
into :query_vector;
END;
/

PRINT query_vector

6. Run a similarity search to find, within your books, the first four most relevant
chunks that talk about backup and recovery
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Using the generated query vector, you search similar chunks in the DOC_CHUNKS
table. For this, you use the VECTOR_DISTANCE SQL function and the FETCH SQL clause
to retrieve the most similar chunks.

SELECT doc_id, chunk_id, chunk_data


FROM doc_chunks
ORDER BY vector_distance(chunk_embedding , :query_vector, COSINE)
FETCH FIRST 4 ROWS ONLY;

SELECT doc_id, chunk_id, chunk_data


FROM doc_chunks
WHERE doc_id=1
ORDER BY vector_distance(chunk_embedding , :query_vector, COSINE)
FETCH FIRST 4 ROWS ONLY;

EXPLAIN PLAN FOR


SELECT doc_id, chunk_id, chunk_data
FROM doc_chunks
ORDER BY vector_distance(chunk_embedding , :query_vector, COSINE)
FETCH FIRST 4 ROWS ONLY;
select plan_table_output from table(dbms_xplan.display('plan_table',null,'all'));

7. Run a multi-vector similarity search to find, within your books, the first four
most relevant chunks in the first two most relevant books.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

SELECT doc_id, chunk_id, chunk_data


FROM doc_chunks
ORDER BY vector_distance(chunk_embedding , :query_vector, COSINE)
FETCH FIRST 2 PARTITIONS BY doc_id, 4 ROWS ONLY;

8. Create an In-Memory Neighbor Graph Vector Index on the vector embeddings that
you created
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~~~~~~~~~~
When dealing with huge vector embedding spaces, you may want to create vector
indexes to accelerate your similarity searches.
Instead of scanning each and every vector embedding in your table, a vector index
uses heuristics to reduce the search space
to accelerate the similarity search. This is called approximate similarity search.

create vector index docs_hnsw_idx on doc_chunks(chunk_embedding)


organization inmemory neighbor graph
distance COSINE
with target accuracy 95;

SQL> SELECT INDEX_NAME, INDEX_TYPE, INDEX_SUBTYPE FROM USER_INDEXES;

INDEX_NAME INDEX_TYPE INDEX_SUBTYPE


-------------- ----------- -----------------------------
DOCS_HNSW_IDX VECTOR INMEMORY_NEIGHBOR_GRAPH_HNSW

SELECT JSON_SERIALIZE(IDX_PARAMS returning varchar2 PRETTY) FROM


VECSYS.VECTOR$INDEX where IDX_NAME = 'DOCS_HNSW_IDX';

JSON_SERIALIZE(IDX_PARAMSRETURNINGVARCHAR2PRETTY)
________________________________________________________________
{
"type" : "HNSW",
"num_neighbors" : 32,
"efConstruction" : 300,
"distance" : "COSINE",
"accuracy" : 95,
"vector_type" : "FLOAT32",
"vector_dimension" : 384,
"degree_of_parallelism" : 1,
"pdb_id" : 3,
"indexed_col" : "CHUNK_EMBEDDING"
}

9. Determine the memory allocation in the vector memory area


~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

To get an idea about the size of your In-Memory Neighbor Graph Vector Index in
memory, you can use the V$VECTOR_MEMORY_POOL view.
See Size the Vector Pool for more information about sizing the vector pool to allow
for vector index creation and maintenance

SQL> select CON_ID, POOL, ALLOC_BYTES/1024/1024 as ALLOC_BYTES_MB,


USED_BYTES/1024/1024 as USED_BYTES_MB from V$VECTOR_MEMORY_POOL order by 1,2;
10. Run an approximate similarity search to identify, within your books, the first
four most relevant chunks (Vector Index Search)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~~~~~~~~~~~~~~~~~~~~~~~~~~~

Use the VECTOR_DISTANCE function and the FETCH APPROX SQL clause to retrieve the
most similar chunks using your vector index

SELECT doc_id, chunk_id, chunk_data


FROM doc_chunks
ORDER BY vector_distance(chunk_embedding , :query_vector, COSINE)
FETCH APPROX FIRST 4 ROWS ONLY WITH TARGET ACCURACY 80;

SELECT doc_id, chunk_id, chunk_data


FROM doc_chunks
WHERE doc_id=1
ORDER BY vector_distance(chunk_embedding , :query_vector, COSINE)
FETCH APPROX FIRST 4 ROWS ONLY WITH TARGET ACCURACY 80;

EXPLAIN PLAN FOR


SELECT doc_id, chunk_id, chunk_data
FROM doc_chunks
ORDER BY vector_distance(chunk_embedding , :query_vector, COSINE)
FETCH APPROX FIRST 4 ROWS ONLY WITH TARGET ACCURACY 80;

select plan_table_output from table(dbms_xplan.display('plan_table',null,'all'));

11. Determine your vector index performance for your approximate similarity
searches
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~~
After a vector index is created, you may be interested to know how accurate your
approximate vector searches are.
The index accuracy reporting feature allows you to determine the accuracy of your
vector indexes.

The DBMS_VECTOR.INDEX_ACCURACY_QUERY provides an accuracy report for a top-K index


search for a specific query vector and a specific target accuracy.

SQL>
SET SERVEROUTPUT ON
declare
report varchar2(128);
begin
report := dbms_vector.index_accuracy_query(
OWNER_NAME => 'VECTOR',
INDEX_NAME => 'DOCS_HNSW_IDX',
qv => :query_vector,
top_K => 10,
target_accuracy => 90 );
dbms_output.put_line(report);
end;
/

===================================================================================
=====================
A comprehensive example of importing pretrained ONNX embedding model and generating
vector embeddings
===================================================================================
=====================

$ sqlplus sys/<password>@<pdb_service_name> as sysdba


SQL> grant db_developer_role to dmuser identified by dmuser;
SQL> grant create mining model to dmuser;
SQL> create or replace directory DM_DUMP as '<work directory path>';
SQL> grant read on directory dm_dump to dmuser;
SQL> grant write on directory dm_dump to dmuser;

SQL> conn dmuser/<password>@<pdbname>;


SQL> –- Drop the model if it exits
SQL> exec DBMS_VECTOR.DROP_ONNX_MODEL(model_name => 'doc_model', force => true);
SQL> -- Load Model
SQL> EXECUTE DBMS_VECTOR.LOAD_ONNX_MODEL('DM_DUMP', 'my_embedding_model.onnx',
'doc_model', JSON('{"function" : "embedding", "embeddingOutput" : "embedding"}'));
SQL> --check the attributes view
SQL>
SELECT model_name, attribute_name, attribute_type, data_type, vector_info
FROM user_mining_model_attributes
WHERE model_name = 'DOC_MODEL'
ORDER BY ATTRIBUTE_NAME;
SQL>
SELECT MODEL_NAME, MINING_FUNCTION, ALGORITHM,
ALGORITHM_TYPE, MODEL_SIZE
FROM user_mining_models
WHERE model_name = 'DOC_MODEL'
ORDER BY MODEL_NAME;

SQL> select * from DM$VMDOC_MODEL ORDER BY NAME;


SQL> select * from DM$VPDOC_MODEL ORDER BY NAME;
SQL> select * from DM$VJDOC_MODEL;
SQL> --apply the model
SQL> SELECT TO_VECTOR(VECTOR_EMBEDDING(doc_model USING 'hello' as data)) AS
embedding;

Instead of DBMS_VECTOR package, you can also use DBMS_DATA_MINING package to import
the pretrained ONNX embedding model.
Use the DBMS_DATA_MINING.IMPORT_ONNX_MODEL procedure to import the model and
declare the input name.
The following code gives an example:

CONN dmuser/<password>@<pdbname>;
DECLARE
m_blob BLOB default empty_blob();
m_src_loc BFILE ;
BEGIN
DBMS_LOB.createtemporary (m_blob, FALSE);
m_src_loc := BFILENAME('DM_DUMP', 'my_embedding_model.onnx');
DBMS_LOB.fileopen (m_src_loc, DBMS_LOB.file_readonly);
DBMS_LOB.loadfromfile (m_blob, m_src_loc, DBMS_LOB.getlength (m_src_loc));
DBMS_LOB.CLOSE(m_src_loc);
DBMS_DATA_MINING.import_onnx_model ('doc_model', m_blob, JSON('{"function" :
"embedding", "embeddingOutput" : "embedding", "input": {"input": ["DATA"]}}'));
DBMS_LOB.freetemporary (m_blob);
END;
/

Which is having correct order of arguments value for


DBMS_DATA_MINING.IMPORT_ONNX_MODEL?
https://docs.oracle.com/en/database/oracle/oracle-database/23/vecse/alternate-
method-import-onnx-models.html
or
https://docs.oracle.com/en/database/oracle/oracle-database/23/arpls/
DBMS_DATA_MINING.html#GUID-17E2EC12-652D-4D2C-85F6-FA0F648105E4

Alternately, the DBMS_DATA_MINING.IMPORT_ONNX_MODEL procedure can also accept a


BLOB argument representing an ONNX file stored and loaded from OCI Object Storage.
The following is an example to load an ONNX model stored in an OCI Object Storage.

DECLARE
model_source BLOB := NULL;
BEGIN
-- get BLOB holding onnx model
model_source := DBMS_CLOUD.GET_OBJECT(
credential_name => 'myCredential',
object_uri => 'https://objectstorage.us-phoenix -1.oraclecloud.com/' ||
'n/namespace -string/b/bucketname/o/myONNXmodel.onnx');
DBMS_DATA_MINING.IMPORT_ONNX_MODEL(
"myonnxmodel",
model_source,
JSON('{ function : "embedding" })
);
END;
/

===================================================================================
=====================
A comprehensive example of importing pretrained ONNX embedding model using
DBMS_DATA_MINING package
===================================================================================
=====================

Instead of DBMS_VECTOR package, you can also use DBMS_DATA_MINING package to import
the pretrained ONNX embedding model.
The following code gives an example:

Use the DBMS_DATA_MINING.IMPORT_ONNX_MODEL procedure to import the model and


declare the input name.

$ sqlplus sys/<passwd>@<pdb_service_name> as sysdba


SQL> grant db_developer_role to dmuser identified by dmuser;
SQL> grant create mining model to dmuser;

SQL> create or replace directory DM_DUMP as '<work directory path>';


SQL> grant read on directory dm_dump to dmuser;
SQL> grant write on directory dm_dump to dmuser;
SQL> conn dmuser/<password>@<pdbname>;

SQL> -- drop the model if exists


SQL> exec DBMS_VECTOR.DROP_ONNX_MODEL(model_name => 'doc_model', force => true);

SQL> -- Load Model


SQL> --> Is the following syntax correct? Because the "my_embedding_model.onnx"
value is passed in 2nd parameter whereas previously it is passed in 1st parameter.
EXECUTE DBMS_DATA_MINING.IMPORT_ONNX_MODEL(
'my_embedding_model.onnx',
'doc_model',
JSON('{"function" : "embedding",
"embeddingOutput" : "embedding",
"input": {"input": ["DATA"]}}')
);

SQL> --check the attributes view


SQL>
SELECT model_name, attribute_name, attribute_type, data_type, vector_info
FROM user_mining_model_attributes
WHERE model_name = 'DOC_MODEL'
ORDER BY ATTRIBUTE_NAME;

===================================================================================
=====================
EASY TO UNDERSTAND EXAMPLE WITHOUT THE PDF DOCUMENT SCENARIO. DIRECT TEXT CONTENT
SCENARIO
===================================================================================
=====================

conn sys/password@CDB_PDB as sysdba

CREATE TABLESPACE tbs1


DATAFILE 'tbs5.dbf' SIZE 20G AUTOEXTEND ON
EXTENT MANAGEMENT LOCAL
SEGMENT SPACE MANAGEMENT AUTO;

drop user docuser cascade;


create user docuser identified by docuser DEFAULT TABLESPACE tbs1 quota unlimited
on tbs1;
grant DB_DEVELOPER_ROLE to docuser;

create or replace directory VEC_DUMP as '/my_local_dir/';


grant read, write on directory VEC_DUMP to docuser;
commit;

conn docuser/password@CDB_PDB;
SET ECHO ON
SET FEEDBACK 1
SET NUMWIDTH 10
SET LINESIZE 80
SET TRIMSPOOL ON
SET TAB OFF
SET PAGESIZE 10000
SET LONG 10000

drop table documentation_tab purge;


create table documentation_tab (id number, text clob);

insert into documentation_tab values (1,


'Analytics empowers business analysts and consumers with modern, AI-powered,
self-service analytics capabilities for data preparation, visualization, enterprise
reporting, augmented analysis, and natural language processing.
Oracle Analytics Cloud is a scalable and secure public cloud service that
provides capabilities to explore and perform collaborative analytics for you, your
workgroup, and your enterprise.

Oracle Analytics Cloud is available on Oracle Cloud Infrastructure Gen 2 in


several regions in North America, EMEA, APAC, and LAD when you subscribe through
Universal Credits. You can subscribe to Professional Edition or Enterprise
Edition.');

insert into documentation_tab values (3,


'Generative AI Data Science is a fully managed and serverless platform for data
science teams to build, train, and manage machine learning models in the Oracle
Cloud Infrastructure.');

insert into documentation_tab values (4,


'Language allows you to perform sophisticated text analysis at scale. Using the
pretrained and custom models, you can process unstructured text to extract insights
without data science expertise.
Pretrained models include sentiment analysis, key phrase extraction, text
classification, and named entity recognition. You can also train custom models for
named entity recognition and text
classification with domain specific datasets. Additionally, you can translate
text across numerous languages.');

insert into documentation_tab values (5,


'When you work with Oracle Cloud Infrastructure, one of the first steps is to
set up a virtual cloud network (VCN) for your cloud resources. This topic gives you
an overview of Oracle Cloud
Infrastructure Networking components and typical scenarios for using a VCN. A
virtual, private network that you set up in Oracle data centers. It closely
resembles a traditional network, with
firewall rules and specific types of communication gateways that you can
choose to use. A VCN resides in a single Oracle Cloud Infrastructure region and
covers one or more CIDR blocks
(IPv4 and IPv6, if enabled). See Allowed VCN Size and Address Ranges. The
terms virtual cloud network, VCN, and cloud network are used interchangeably in
this documentation.
For more information, see VCNs and Subnets.');

insert into documentation_tab values (6,


'NetSuite banking offers several processing options to accurately track your
income. You can record deposits to your bank accounts to capture customer payments
and other monies received in the
course of doing business. For a deposit, you can select payments received for
existing transactions, add funds not related to transaction payments, and record
any cash received back from the bank.');

commit;

EXECUTE dbms_vector.drop_onnx_model(model_name => 'doc_model', force => true);


EXECUTE dbms_vector.load_onnx_model(
'VEC_DUMP',
'my_embedding_model.onnx',
'doc_model',
json('{"function" : "embedding", "embeddingOutput" : "embedding" , "input":
{"input": ["DATA"]}}')
);

create table doc_chunks as (


SELECT d.id id,
row_number() over (partition by d.id order by d.id) chunk_id,
vc.chunk_offset chunk_offset,
vc.chunk_length chunk_length,
vc.chunk_text chunk,
vector_embedding(doc_model using vc.chunk_text as data) vector
FROM documentation_tab d,
vector_chunks(d.text by words max 100 overlap 10 split RECURSIVELY) vc
);

desc doc_chunks;
set linesize 100
set long 1000
col id for 999
col chunk_id for 99999
col chunk_offset for 99999
col chunk_length for 99999
col chunk for a30
col vector for a100
select id, chunk_id, chunk_offset, chunk_length, chunk from doc_chunks;
select vector from doc_chunks where rownum <= 1;

create vector index vidx on doc_chunks (vector)


organization neighbor partitions
with target accuracy 95
distance EUCLIDEAN parameters (
type IVF,
neighbor partitions 2);

select id, vector_distance(


vector,
vector_embedding(doc_model using 'machine learning models' as data),
EUCLIDEAN) results
FROM doc_chunks order by results;

select id, vector_distance(


vector,
vector_embedding(doc_model using 'gen ai' as data),
EUCLIDEAN) results
FROM doc_chunks order by results;

select id, vector_distance(


vector,
vector_embedding(doc_model using 'computing networks' as data),
MANHATTAN) results
FROM doc_chunks order by results;

select id, vector_distance(


vector,
vector_embedding(doc_model using 'banking, money' as data),
MANHATTAN) results
FROM doc_chunks order by results;

===================================================================================
=====================
SQL RAG EXAMPLE
===================================================================================
=====================

This scenario allows you to run a similarity search for specific documentation
content based on a user query.
Once documentation chunks are retrieved, they are concatenated and a prompt is
generated to ask an LLM to answer the user question
using retrieved chunks.
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
++
conn sys/password AS sysdba

SET SERVEROUTPUT ON;


SET ECHO ON;
SET LONG 100000;

DROP USER vector cascade;


CREATE USER vector identified by <my vector password>
GRANT DB_DEVELOPER_ROLE, CREATE CREDENTIAL TO vector;

EXEC UTL_HTTP.SET_PROXY('<my proxy full name>:<my proxy port>');

BEGIN
DBMS_NETWORK_ACL_ADMIN.APPEND_HOST_ACE(
host => '*',
ace => xs$ace_type(privilege_list => xs$name_list('connect'),
principal_name => 'VECTOR',
principal_type => xs_acl.ptype_db));
END;
/

conn docuser/password;

-- Create a credential for Oracle Cloud Infrastructure Generative AI

BEGIN
DBMS_VECTOR_CHAIN.DROP_CREDENTIAL(credential_name => 'OCI_CRED');
EXCEPTION
WHEN OTHERS THEN NULL;
END;
/

DECLARE
jo json_object_t;
BEGIN
jo := json_object_t();
jo.put('user_ocid', '<user ocid>');
jo.put('tenancy_ocid', '<tenancy ocid>');
jo.put('compartment_ocid', '<compartment ocid>');
jo.put('private_key', '<private key>');
jo.put('fingerprint', '<fingerprint>');
DBMS_OUTPUT.PUT_LINE(jo.to_string);
DBMS_VECTOR_CHAIN.CREATE_CREDENTIAL(
credential_name => 'OCID_CRED',
params => json(jo.to_string));
END;
/

col owner format a15


col credential_name format a20
col username format a20

SELECT owner, credential_name, username


FROM all_credentials
ORDER BY owner, credential_name, username;

SET SERVEROUTPUT ON;


VAR prompt CLOB;
VAR user_question CLOB;
VAR context CLOB;
BEGIN
-- initialize the concatenated string
:context := '';
-- read this question from the user
:user_question := 'what are vector indexes?';
-- cursor to fetch chunks relevant to the user's query
FOR rec IN (SELECT EMBED_DATA
FROM DOC_ID = 'Vector User Guide'
ORDER BY vector_distance(embed_vector, vector_embedding(
doc_model using :user_question as input), COSINE)
FETCH EXACT FIRST 10 ROWS ONLY)
LOOP
-- concatenate each value to the string
:context := :context || rec.embed_data;
END LOOP;
-- concatenate strings and format it as an enhanced prompt to the LLM
:prompt := 'Answer the following question using the supplied context
assuming you are a subject matter expert. Question: '
|| :user_question || ' Context: ' || :context;
DBMS_OUTPUT.PUT_LINE('Generated prompt: ' || :prompt);
END;
/

DECLARE
input CLOB;
params CLOB;
output CLOB;
BEGIN
input := :prompt;
params := '{
"provider" : "ocigenai",
"credential_name" : "OCI_CRED",
"url" :
https://inference.generativeai.us-chicago-1.oci.oraclecloud.com/20231130/actions/
generateText,
"model" : "cohere.command"
}';
output := DBMS_VECTOR_CHAIN.UTL_TO_GENERATE_TEXT(input, json(params));
DBMS_OUTPUT.PUT_LINE(output);
IF output IS NOT NULL THEN
DBMS_LOB.FREETEMPORARY(output);
END IF;
EXCEPTION
WHEN OTHERS THEN
DBMS_OUTPUT.PUT_LINE(SQLERRM);
DBMS_OUTPUT.PUT_LINE(SQLCODE);
END;
/
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
++

You might also like