pip install --upgrade pymilvus
pip install git+https://github.com/zc277584121/graphrag.git
import nest_asyncio
nest_asyncio.apply()
import os
import urllib.request
index_root = os.path.join(os.getcwd(), 'graphrag_index')
os.makedirs(os.path.join(index_root, 'input'), exist_ok=True)
url = "https://www.gutenberg.org/cache/epub/7785/pg7785.txt"
file_path = os.path.join(index_root, 'input', 'davinci.txt')
urllib.request.urlretrieve(url, file_path)
with open(file_path, 'r+', encoding='utf-8') as file:
# We use the first 934 lines of the text file, because the later lines are not relevant for this example.
# If you want to save api key cost, you can truncate the text file to a smaller size.
lines = file.readlines()
file.seek(0)
file.writelines(lines[:934]) # Decrease this number if you want to save api key cost.
file.truncate()
python -m graphrag.index --init --root ./graphrag_index
python -m graphrag.index --root ./graphrag_index
import os
import pandas as pd
import tiktoken
from graphrag.query.context_builder.entity_extraction import EntityVectorStoreKey
from graphrag.query.indexer_adapters import (
# read_indexer_covariates,
read_indexer_entities,
read_indexer_relationships,
read_indexer_reports,
read_indexer_text_units,
)
from graphrag.query.input.loaders.dfs import (
store_entity_semantic_embeddings,
)
from graphrag.query.llm.oai.chat_openai import ChatOpenAI
from graphrag.query.llm.oai.embedding import OpenAIEmbedding
from graphrag.query.llm.oai.typing import OpenaiApiType
from graphrag.query.question_gen.local_gen import LocalQuestionGen
from graphrag.query.structured_search.local_search.mixed_context import (
LocalSearchMixedContext,
)
from graphrag.query.structured_search.local_search.search import LocalSearch
from graphrag.vector_stores import MilvusVectorStore
output_dir = os.path.join(index_root, "output")
subdirs = [os.path.join(output_dir, d) for d in os.listdir(output_dir)]
latest_subdir = max(subdirs, key=os.path.getmtime) # Get latest output directory
INPUT_DIR = os.path.join(latest_subdir, "artifacts")
COMMUNITY_REPORT_TABLE = "create_final_community_reports"
ENTITY_TABLE = "create_final_nodes"
ENTITY_EMBEDDING_TABLE = "create_final_entities"
RELATIONSHIP_TABLE = "create_final_relationships"
COVARIATE_TABLE = "create_final_covariates"
TEXT_UNIT_TABLE = "create_final_text_units"
COMMUNITY_LEVEL = 2
# read nodes table to get community and degree data
entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet")
entity_embedding_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_EMBEDDING_TABLE}.parquet")
entities = read_indexer_entities(entity_df, entity_embedding_df, COMMUNITY_LEVEL)
description_embedding_store = MilvusVectorStore(
collection_name="entity_description_embeddings",
)
# description_embedding_store.connect(uri="http://localhost:19530") # For Milvus docker service
description_embedding_store.connect(uri="./milvus.db") # For Milvus Lite
entity_description_embeddings = store_entity_semantic_embeddings(
entities=entities, vectorstore=description_embedding_store
)
print(f"Entity count: {len(entity_df)}")
entity_df.head()
relationship_df = pd.read_parquet(f"{INPUT_DIR}/{RELATIONSHIP_TABLE}.parquet")
relationships = read_indexer_relationships(relationship_df)
print(f"Relationship count: {len(relationship_df)}")
relationship_df.head()
report_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet")
reports = read_indexer_reports(report_df, entity_df, COMMUNITY_LEVEL)
print(f"Report records: {len(report_df)}")
report_df.head()
text_unit_df = pd.read_parquet(f"{INPUT_DIR}/{TEXT_UNIT_TABLE}.parquet")
text_units = read_indexer_text_units(text_unit_df)
print(f"Text unit records: {len(text_unit_df)}")
text_unit_df.head()
api_key = os.environ["OPENAI_API_KEY"] # Your OpenAI API key
llm_model = "gpt-4o" # Or gpt-4-turbo-preview
embedding_model = "text-embedding-3-small"
llm = ChatOpenAI(
api_key=api_key,
model=llm_model,
api_type=OpenaiApiType.OpenAI,
max_retries=20,
)
token_encoder = tiktoken.get_encoding("cl100k_base")
text_embedder = OpenAIEmbedding(
api_key=api_key,
api_base=None,
api_type=OpenaiApiType.OpenAI,
model=embedding_model,
deployment_name=embedding_model,
max_retries=20,
)
context_builder = LocalSearchMixedContext(
community_reports=reports,
text_units=text_units,
entities=entities,
relationships=relationships,
covariates=None, #covariates,#todo
entity_text_embeddings=description_embedding_store,
embedding_vectorstore_key=EntityVectorStoreKey.ID, # if the vectorstore uses entity title as ids, set this to EntityVectorStoreKey.TITLE
text_embedder=text_embedder,
token_encoder=token_encoder,
)
local_context_params = {
"text_unit_prop": 0.5,
"community_prop": 0.1,
"conversation_history_max_turns": 5,
"conversation_history_user_turns_only": True,
"top_k_mapped_entities": 10,
"top_k_relationships": 10,
"include_entity_rank": True,
"include_relationship_weight": True,
"include_community_rank": False,
"return_candidate_context": False,
"embedding_vectorstore_key": EntityVectorStoreKey.ID, # set this to EntityVectorStoreKey.TITLE if the vectorstore uses entity title as ids
"max_tokens": 12_000, # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)
}
llm_params = {
"max_tokens": 2_000, # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 1000=1500)
"temperature": 0.0,
}
search_engine = LocalSearch(
llm=llm,
context_builder=context_builder,
token_encoder=token_encoder,
llm_params=llm_params,
context_builder_params=local_context_params,
response_type="multiple paragraphs", # free form text describing the response type and format, can be anything, e.g. prioritized list, single paragraph, multiple paragraphs, multiple-page report
)
result = await search_engine.asearch("Tell me about Leonardo Da Vinci")
print(result.response)
# Leonardo da Vinci
Leonardo da Vinci, born in 1452 in the town of Vinci near Florence, is widely celebrated as one of the most versatile geniuses of the Italian Renaissance. His full name was Leonardo di Ser Piero d'Antonio di Ser Piero di Ser Guido da Vinci, and he was the natural and first-born son of Ser Piero, a country notary [Data: Entities (0)]. Leonardo's contributions spanned various fields, including art, science, engineering, and philosophy, earning him the title of the most Universal Genius of Christian times [Data: Entities (8)].
## Early Life and Training
Leonardo's early promise was recognized by his father, who took some of his drawings to Andrea del Verrocchio, a renowned artist and sculptor. Impressed by Leonardo's talent, Verrocchio accepted him into his workshop around 1469-1470. Here, Leonardo met other notable artists, including Botticelli and Lorenzo di Credi [Data: Sources (6, 7)]. By 1472, Leonardo was admitted into the Guild of Florentine Painters, marking the beginning of his professional career [Data: Sources (7)].
## Artistic Masterpieces
Leonardo is perhaps best known for his iconic paintings, such as the "Mona Lisa" and "The Last Supper." The "Mona Lisa," renowned for its subtle expression and detailed background, is housed in the Louvre and remains one of the most famous artworks in the world [Data: Relationships (0, 45)]. "The Last Supper," a fresco depicting the moment Jesus announced that one of his disciples would betray him, is located in the refectory of Santa Maria delle Grazie in Milan [Data: Sources (2)]. Other significant works include "The Virgin of the Rocks" and the "Treatise on Painting," which he began around 1489-1490 [Data: Relationships (7, 12)].
## Scientific and Engineering Contributions
Leonardo's genius extended beyond art to various scientific and engineering endeavors. He made significant observations in anatomy, optics, and hydraulics, and his notebooks are filled with sketches and ideas that anticipated many modern inventions. For instance, he anticipated Copernicus' theory of the earth's movement and Lamarck's classification of animals [Data: Relationships (38, 39)]. His work on the laws of light and shade and his mastery of chiaroscuro had a profound impact on both art and science [Data: Sources (45)].
## Patronage and Professional Relationships
Leonardo's career was significantly influenced by his patrons. Ludovico Sforza, the Duke of Milan, employed Leonardo as a court painter and general artificer, commissioning various works and even gifting him a vineyard in 1499 [Data: Relationships (9, 19, 84)]. In his later years, Leonardo moved to France under the patronage of King Francis I, who provided him with a princely income and held him in high regard [Data: Relationships (114, 37)]. Leonardo spent his final years at the Manor House of Cloux near Amboise, where he was frequently visited by the King and supported by his close friend and assistant, Francesco Melzi [Data: Relationships (28, 122)]'.
## Legacy and Influence
Leonardo da Vinci's influence extended far beyond his lifetime. He founded a School of painting in Milan, and his techniques and teachings were carried forward by his students and followers, such as Giovanni Ambrogio da Predis and Francesco Melzi [Data: Relationships (6, 15, 28)]. His works continue to be celebrated and studied, cementing his legacy as one of the greatest masters of the Renaissance. Leonardo's ability to blend art and science has left an indelible mark on both fields, inspiring countless generations of artists and scientists [Data: Entities (148, 86); Relationships (27, 12)].
In summary, Leonardo da Vinci's unparalleled contributions to art, science, and engineering, combined with his innovative thinking and profound influence on his contemporaries and future generations, make him a towering figure in the history of human achievement. His legacy continues to inspire admiration and study, underscoring the timeless relevance of his genius.
question_generator = LocalQuestionGen(
llm=llm,
context_builder=context_builder,
token_encoder=token_encoder,
llm_params=llm_params,
context_builder_params=local_context_params,
)
question_history = [
"Tell me about Leonardo Da Vinci",
"Leonardo's early works",
]
candidate_questions = await question_generator.agenerate(
question_history=question_history, context_data=None, question_count=5
)
candidate_questions.response
["- What were some of Leonardo da Vinci's early works and where are they housed?",
"- How did Leonardo da Vinci's relationship with Andrea del Verrocchio influence his early works?",
'- What notable projects did Leonardo da Vinci undertake during his time in Milan?',
"- How did Leonardo da Vinci's engineering skills contribute to his projects?",
"- What was the significance of Leonardo da Vinci's relationship with Francis I of France?"]
# import shutil
#
# shutil.rmtree(index_root)