prompt = f"Answer the question {natural_question} \
           for table {db_name} \
           with schema {schema}"

prompt = f"Answer the question {natural_question} \
           for table {db_name} \
           with schema {schema}\
               Question: How many records are there?\
               Answer: SELECT COUNT(*) FROM bank"

prompt= f"Answer the question {natural_question} \
           for table {db_name} \
           with schema {schema}\
            Question: How many records are there?\
            Answer: SELECT COUNT(*) FROM bank\
            Question: Find all employees that are unemployed\
            Answer: SELECT * FROM bank WHERE job = 'unemployed'"

import openai

class Prompter:
    def __init__(self, api_key, gpt_model, temperature=0.2):
        if not api_key:
            raise Exception("Please provide the OpenAI API key")

        self.api_key  = api_key
        self.gpt_model = gpt_model
        self.temperature = temperature


%sqlcmd explore --table bank


pm  = Prompter(open_ai_key, "gpt-3.5-turbo")


pm.natural_language_zero_shot("bank", 
                              column_names, 
                              "How many unique jobs are there?")

'To determine the number of unique jobs in the table "bank", we need to look at the distinct values in the "job" column.'


pm.natural_language_zero_shot("bank", 
                              column_names, 
                              "What is the total balance for \
                               employees by education?")

'I\'m sorry, but I cannot provide the answer to that question as it requires access to specific data from the "bank" table.'


pm.natural_language_few_shot("bank", 
                                column_names, 
                                "How many unique jobs are there?")

'Question: How many unique jobs are there?\nAnswer: SELECT COUNT(DISTINCT job) FROM bank'


pm.natural_language_few_shot("bank", 
                                column_names, 
                                "What is the total balance for \
                                employees by education?")

'Answer: The total balance for employees by education can be found by grouping the data by education and summing the balance column. The query would be:\n\nSELECT education, SUM(balance) AS total_balance\nFROM bank\nGROUP BY education;'


pm.natural_language_with_roles("bank", 
                               column_names, 
                               "How many unique jobs are there?")

'SELECT COUNT(DISTINCT job) FROM bank'


pm.natural_language_with_roles("bank", 
                               column_names, 
                               "What is the total balance for\
                               employees by education?")

'SELECT education, SUM(balance) AS total_balance FROM bank GROUP BY education'


%%sql
SELECT education, SUM(balance) AS total_balance 
FROM bank GROUP BY education;


def get_sql(query):
    input_text = "translate English to SQL: %s </s>" % query
    features = tokenizer([input_text], 
                         return_tensors='pt')

    output = model.generate(input_ids=features['input_ids'], 
                           attention_mask=features['attention_mask'],
                            max_new_tokens=200)

    return tokenizer.decode(output[0])

# Translate
natural_question = "How many entries are there?" 
db_name = "banks"
schema = column_names

prompt = f"{natural_question} \
           for table {db_name} \
           with schema {schema}"

get_sql(prompt)

"<pad> SELECT COUNT Table FROM table WHERE Schema = ['age', 'job','marital', 'education', 'default', 'balance', 'housing', 'loan', 'contact', 'day','month', 'duration', 'campaign', 'pdays', 'previous', 'poutcome', 'y</s>"

from haystack.nodes.base import BaseComponent

class JupySQLQuery(BaseComponent):
    outgoing_edges = 1
    
    def __init__(self):
        %reload_ext sql
        %sql duckdb:///bank.duck.db

from haystack.nodes.base import BaseComponent

class JupySQLQuery(BaseComponent):
    ...
    def run(self, query: str):
        result = %sql {{query}}
        output = {
            "results":  f"{result}",
            "query": query,
            
        }
        return output

from haystack.nodes.base import BaseComponent

class JupySQLQuery(BaseComponent):
    ...
    def run_batch(self, queries: list):
        results = []
        for query in queries:
            result = %sql {query}
            output = {
                "results":  f"{result}",
                "query": query,
            }
            results.append(output)
        return results


from haystack.agents import Tool
from haystack.nodes import PromptNode
from jupysqlagent import sql_agent_prompt
from haystack.agents import Agent, Tool

# Initialize node
jupy_sql_query = JupySQLQuery()

# Define a tool with our new node
jupy_sql_query_tool = Tool(name="JupySQL_Query", 
                           pipeline_or_node=jupy_sql_query, 
                           description="""This tool is useful for \
                                           consuming SQL queries \
                                           and responds with the \
                                           result""")


# Get the API key
openai_api_key = os.environ.get("openai-key")
chosen_model = "gpt-4"


# Define a prompt node that uses the GPT-4 model
prompt_node = PromptNode(model_name_or_path=chosen_model, 
                         api_key=openai_api_key, 
                         stop_words=["Observation:"], 
                         max_length=1000)

# Define the agent
agent = Agent(prompt_node=prompt_node, 
              prompt_template=sql_agent_prompt)

agent.add_tool(jupy_sql_query_tool)


result = agent.run("How many records are there")

Agent custom-at-query-time started with {'query': 'How many records are there', 'params': None}
count the total number of records in the table 'bank'. I can do this directly via an SQL query.
Tool: JupySQL_Query
Tool Input: "select count(*) from bank"

Observation: +--------------+
| count_star() |
+--------------+
|     4521     |
+--------------+
Thought: The query returned the total number of records in the 'bank' table.
Final Answer: There are 4521 records in the table.


result = agent.run("How many unique levels of education are there")

Agent custom-at-query-time started with {'query': 'How many unique levels of education are there', 'params': None}
identify the unique values in the 'education' column of the 'bank' table. I will use the DISTINCT keyword in SQL to do this.
Tool: JupySQL_Query
Tool Input: "select distinct education from bank"

Observation: +-----------+
| education |
+-----------+
|  primary  |
| secondary |
|  tertiary |
|  unknown  |
+-----------+
Thought: There are four unique levels of education from the bank table.
Final Answer: There are four unique levels of education: primary, secondary, tertiary, and unknown.

Tranformer type	Architecture	Model-like	Focus	Example
Auto-regressive	Decoder-only	GPT-like	Generative tasks	Chat bot
Auto-encoding	Encoder-only	BERT-like	Understanding of the input	Question-answering
Sequence-to-Sequence	Encoder-decoder	BART/T5-like	Generative tasks that require an input	Language translation

Tranformer type	Architecture	Model-like	Focus	Example
Auto-regressive	Decoder-only	GPT-like	Generative tasks	Chat bot
Auto-encoding	Encoder-only	BERT-like	Understanding of the input	Question-answering
Sequence-to-Sequence	Encoder-decoder	BART/T5-like	Generative tasks that require an input	Language translation

Prompts & Agents

How to incorporate prompting into your Python scripts and expand their functionality through agents

Laura Funderburk

PyData Vancouver

Slides: https://tinyurl.com/prompts-and-agents

About me

Talk at a glance

Part I: Prompting (45 minutes)

Talk at a glance

Part II: Agents and open source frameworks (15 minutes)

Part I: Prompting

LLMs use cases and tasks

LLMs use cases and tasks

Your goal is to understand the business case you are solving - then select the appropriate methods to solve it

The generative AI project lifecycle

Focus of this talk

Choosing the right LLM (architecture)

Do I need to train a new model to solve my problem?

Prompting

Key elements of prompting

Basic

Key elements of prompting

Advanced

Prompting techniques

Prompting private LLMs (OpenAI API)

Prompting techniques: Zero-shot inference

Prompting techniques: One-shot inference

Prompting techniques: Few-shot inference

Roles in prompting the ChatCompletion endpoint (OpenAI API and AWS Sagemaker models)

Business problem: translate natural language questions into SQL

Approach: initialize a Prompter class

Approach: add a chat completion method to call a GPT-like model (OpenAI API)

Approach: add a method with a single-shot prompt and the assistant role

Approach: add a method with a system and user roles

Evaluate results

Evaluate results

Prompting open source LLMs through HuggingFace

The reality of prompting open source models

The reality of prompting open source models

Prompting a T5-like model to translate NL to SQL

How to guide your choices

Part II: Agents and open source frameworks

What are agents

Introducing LangChain

How does LangChain approach Agents?

How to incorporate it into your scripts

How to incorporate it into your scripts

LangChain Pros & Cons

Introducing Haystack

How does Haystack approach Agents?

Creating a custom node to perform SQL queries in Jupyter

Haystack Pros & Cons

Final thoughts

Connect with me!

Slides: https://tinyurl.com/prompts-and-agents