Skip to content
Snippets Groups Projects
Commit eb339d0f authored by abir.chebbi's avatar abir.chebbi
Browse files

add scipts

parent a1fcbfff
No related branches found
No related tags found
No related merge requests found
## Source: https://docs.aws.amazon.com/opensearch-service/latest/developerguide/serverless-sdk.html
import boto3
import botocore
import time
client = boto3.client('opensearchserverless')
service = 'aoss'
Vector_store_name='test1'
def createEncryptionPolicy(client):
"""Creates an encryption policy that matches all collections beginning with test"""
try:
response = client.create_security_policy(
description='Encryption policy for test collections',
name='test-policy',
policy="""
{
\"Rules\":[
{
\"ResourceType\":\"collection\",
\"Resource\":[
\"collection\/test*\"
]
}
],
\"AWSOwnedKey\":true
}
""",
type='encryption'
)
print('\nEncryption policy created:')
print(response)
except botocore.exceptions.ClientError as error:
if error.response['Error']['Code'] == 'ConflictException':
print(
'[ConflictException] The policy name or rules conflict with an existing policy.')
else:
raise error
def createNetworkPolicy(client):
"""Creates a network policy that matches all collections beginning with test"""
try:
response = client.create_security_policy(
description='Network policy for Test collections',
name='test-policy',
policy="""
[{
\"Description\":\"Public access for Test collection\",
\"Rules\":[
{
\"ResourceType\":\"dashboard\",
\"Resource\":[\"collection\/test*\"]
},
{
\"ResourceType\":\"collection\",
\"Resource\":[\"collection\/test*\"]
}
],
\"AllowFromPublic\":true
}]
""",
type='network'
)
print('\nNetwork policy created:')
print(response)
except botocore.exceptions.ClientError as error:
if error.response['Error']['Code'] == 'ConflictException':
print(
'[ConflictException] A network policy with this name already exists.')
else:
raise error
def createAccessPolicy(client):
"""Creates a data access policy that matches all collections beginning with test"""
try:
response = client.create_access_policy(
description='Data access policy for Test collections',
name='test-policy',
policy="""
[{
\"Rules\":[
{
\"Resource\":[
\"index\/test*\/*\"
],
\"Permission\":[
\"aoss:CreateIndex\",
\"aoss:DeleteIndex\",
\"aoss:UpdateIndex\",
\"aoss:DescribeIndex\",
\"aoss:ReadDocument\",
\"aoss:WriteDocument\"
],
\"ResourceType\": \"index\"
},
{
\"Resource\":[
\"collection\/test*\"
],
\"Permission\":[
\"aoss:CreateCollectionItems\",
\"aoss:DeleteCollectionItems\",
\"aoss:UpdateCollectionItems\",
\"aoss:DescribeCollectionItems\"
],
\"ResourceType\": \"collection\"
}
],
\"Principal\":[
\"arn:aws:iam::768034348959:user/AbirChebbi\"
]
}]
""",
type='data'
)
print('\nAccess policy created:')
print(response)
except botocore.exceptions.ClientError as error:
if error.response['Error']['Code'] == 'ConflictException':
print(
'[ConflictException] An access policy with this name already exists.')
else:
raise error
def waitForCollectionCreation(client):
"""Waits for the collection to become active"""
time.sleep(40)
response = client.batch_get_collection(
names=['test1'])
print('\nCollection successfully created:')
print(response["collectionDetails"])
# Extract the collection endpoint from the response
host = (response['collectionDetails'][0]['collectionEndpoint'])
final_host = host.replace("https://", "")
return final_host
def main():
createEncryptionPolicy(client)
createNetworkPolicy(client)
createAccessPolicy(client)
collection = client.create_collection(name=Vector_store_name,type='VECTORSEARCH')
ENDPOINT= waitForCollectionCreation(client)
print("Collection created successfully:", collection)
print("Collection ENDPOINT:", ENDPOINT)
if __name__== "__main__":
main()
\ No newline at end of file
import boto3
BUCKET_NAME = 'cloud-lecture-2023'
S3_CLIENT = boto3.client('s3')
S3_RESOURCE = boto3.resource('s3')
# # # Delete Bucket
# First, delete all objects in the Bucket
bucket = S3_RESOURCE.Bucket(BUCKET_NAME)
print("Deleting all objects in Bucket\n")
bucket.objects.all().delete()
print("Deleting Bucket")
# Bucket Deletion
response = S3_CLIENT.delete_bucket(
Bucket=BUCKET_NAME
)
print(response)
import boto3
import os
LOCAL_DIR = "pdfs"
BUCKET_NAME = 'cloud-lecture-2023'
# Initiate S3 client
s3_client = boto3.client('s3')
# Create S3 Bucket
print("Creating Bucket")
response = s3_client.create_bucket(
Bucket=BUCKET_NAME,
)
print(response)
print()
# Function to write files to S3
def write_files(directory, bucket):
for filename in os.listdir(directory):
if filename.endswith(".pdf"): # Check if the file is a PDF
file_path = os.path.join(directory, filename)
with open(file_path, 'rb') as file:
print(f"Uploading {filename} to bucket {bucket}...")
s3_client.put_object(
Body=file,
Bucket=bucket,
Key=filename
)
print(f"{filename} uploaded successfully.")
# Upload PDF files to S3 bucket
print("Writing Items to Bucket")
write_files(LOCAL_DIR, BUCKET_NAME)
...@@ -14,7 +14,7 @@ index_name = "cloud_lecture_test" ...@@ -14,7 +14,7 @@ index_name = "cloud_lecture_test"
## S3_client ## S3_client
s3_client = boto3.client('s3') s3_client = boto3.client('s3')
## Bucket name where documents are stored ## Bucket name where documents are stored
BUCKET_NAME = "chatbotlab" BUCKET_NAME = "cloud-lecture-2023"
## Bedrock client ## Bedrock client
bedrock_client = boto3.client(service_name="bedrock-runtime") bedrock_client = boto3.client(service_name="bedrock-runtime")
...@@ -25,7 +25,7 @@ credentials = boto3.Session().get_credentials() ...@@ -25,7 +25,7 @@ credentials = boto3.Session().get_credentials()
awsauth = AWSV4SignerAuth(credentials, 'us-east-1', 'aoss') awsauth = AWSV4SignerAuth(credentials, 'us-east-1', 'aoss')
## Vector DB endpoint ## Vector DB endpoint
host= 'd7gvxdj7jpz3h3bj0xq6.us-east-1.aoss.amazonaws.com' host= 'ispfynbvy6eov4efdsqd.us-east-1.aoss.amazonaws.com'
## Opensearch Client ## Opensearch Client
OpenSearch_client = OpenSearch( OpenSearch_client = OpenSearch(
...@@ -91,8 +91,12 @@ def split_text(pages, chunk_size, chunk_overlap, local_dir): ...@@ -91,8 +91,12 @@ def split_text(pages, chunk_size, chunk_overlap, local_dir):
return chunks return chunks
## Generate embeddings and index them using Opensearch ## Generate embeddings and index them using Opensearch
# def generate_embeddings():
def generate_embeddings(bedrock_client, chunks,awsauth,index_name): # def store_embeddings():
def generate_store_embeddings(bedrock_client, chunks,awsauth,index_name):
embeddings_model = BedrockEmbeddings(model_id="amazon.titan-embed-text-v1", client=bedrock_client) embeddings_model = BedrockEmbeddings(model_id="amazon.titan-embed-text-v1", client=bedrock_client)
docsearch = OpenSearchVectorSearch.from_documents( docsearch = OpenSearchVectorSearch.from_documents(
chunks, chunks,
...@@ -117,7 +121,7 @@ def main(): ...@@ -117,7 +121,7 @@ def main():
chunks=split_text(docs, 1000, 100, LOCAL_DIR) chunks=split_text(docs, 1000, 100, LOCAL_DIR)
print("Sample chunk:", chunks[0]) print("Sample chunk:", chunks[0])
create_index(index_name) create_index(index_name)
embeddings = generate_embeddings(bedrock_client, chunks,awsauth,index_name) embeddings = generate_store_embeddings(bedrock_client, chunks,awsauth,index_name)
print("Embeddings processing completed", embeddings) print("Embeddings processing completed", embeddings)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment