import numpy as np
from dotenv import load_dotenv
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.manifold import TSNE
from langchain import PromptTemplate
from langchain.schema import Document
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
load_dotenv()= ChatOpenAI(temperature=0,model='gpt-3.5-turbo-16k')
llm = OpenAIEmbeddings()
embeddings
# Taking out the warnings
from warnings import simplefilter
# Filter out FutureWarnings
='ignore', category=FutureWarning) simplefilter(action
= '../../../../books_content/content_4900.txt'
content_path with open(content_path, 'r') as file:
= file.read()
book_content
= RecursiveCharacterTextSplitter(separators=["\n\n", "\n", "\t"], chunk_size=2000, chunk_overlap=200)
text_splitter = text_splitter.create_documents([book_content])
docs print (f"Our book is splited up into {len(docs)} documents")
Our book is splited up into 122 documents
= embeddings.embed_documents([x.page_content for x in docs]) vectors
= []
silhouette_avg for num_clusters in list(range(3,20)):
= KMeans(n_clusters=num_clusters, init = "k-means++", n_init = 10)
kmeans
kmeans.fit_predict(vectors)= silhouette_score(vectors, kmeans.labels_)
score
silhouette_avg.append(score)
# import matplotlib.pyplot as plt
# plt.plot(np.arange(3,20),silhouette_avg,'bx-')
# plt.xlabel('Values of K')
# plt.ylabel('Silhouette score')
# plt.title('Silhouette analysis For Optimal k')
# _ = plt.xticks(np.arange(3,20))
= np.argmax(silhouette_avg)+2
num_clusters print (f"Best K: {num_clusters}")
Best K: 9
# Perform K-means clustering
= KMeans(n_clusters=num_clusters, random_state=42).fit(vectors) kmeans
import numpy as np
import plotly.express as px
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans
import plotly.io as pio
= "plotly_mimetype+notebook_connected"
pio.renderers.default
# Assuming you have defined 'vectors' and 'kmeans' somewhere in your code
# Perform t-SNE and reduce to 3 dimensions
= TSNE(n_components=3, random_state=42)
tsne = tsne.fit_transform(np.array(vectors))
reduced_data_tsne
# Create an interactive 3D scatter plot
= px.scatter_3d(
fig =reduced_data_tsne[:, 0],
x=reduced_data_tsne[:, 1],
y=reduced_data_tsne[:, 2],
z=kmeans.labels_,
color='Book Embeddings Clustered (3D)',
title=600,
width=500
height
)
# Show the plot
fig.show()
# Perform t-SNE and reduce to 2 dimensions
= TSNE(n_components=2, random_state=42)
tsne = tsne.fit_transform(np.array(vectors))
reduced_data_tsne
# Plot the reduced data
0], reduced_data_tsne[:, 1], c=kmeans.labels_)
plt.scatter(reduced_data_tsne[:, 'Dimension 1')
plt.xlabel('Dimension 2')
plt.ylabel('Book Embeddings Clustered')
plt.title( plt.show()
# Find the closest embeddings to the centroids
# Create an empty list that will hold your closest points
= []
closest_indices
# Loop through the number of clusters you have
for i in range(num_clusters):
# Get the list of distances from that particular cluster center
= np.linalg.norm(vectors - kmeans.cluster_centers_[i], axis=1)
distances
# Find the list position of the closest one (using argmin to find the smallest distance)
= np.argmin(distances)
closest_index
# Append that position to your closest indices list
closest_indices.append(closest_index)
= sorted(closest_indices)
selected_indices selected_indices
[16, 21, 44, 56, 78, 85, 103]
= """
map_prompt You will be given a single passage of a book. This section will be enclosed in triple backticks (```)
Your goal is to give a summary of this section so that a reader will have a full understanding of what happened.
Your response should be at least three paragraphs and fully encompass what was said.
```{text}```
FULL SUMMARY:
"""
= PromptTemplate(template=map_prompt, input_variables=["text"]) map_prompt_template
from langchain.chains.summarize import load_summarize_chain
= load_summarize_chain(llm=llm, chain_type="stuff", prompt=map_prompt_template) map_chain
= [docs[doc] for doc in selected_indices]
selected_docs
# Make an empty list to hold your summaries
= []
summary_list
# Loop through a range of the lenght of your selected docs
for i, doc in enumerate(selected_docs):
# Go get a summary of the chunk
= map_chain.run([doc])
chunk_summary
# Append that summary to your list
summary_list.append(chunk_summary)
print (f"Summary #{i} (chunk #{selected_indices[i]}) - Preview: {chunk_summary[:250]} \n")
Summary #0 (chunk #16) - Preview: In this passage, the author discusses the importance of stimulating the brain in order to facilitate learning and memory retention. They explain that the size or number of cerebral neurons does not determine intellectual power, but rather the richnes
Summary #1 (chunk #21) - Preview: In this passage, the importance of two neural structures in the central part of our brain for teachers is discussed. These structures automatically process a large volume of sensory information and filter it. The filtration system typically filters o
Summary #2 (chunk #44) - Preview: This passage discusses the different types of motivation, specifically extrinsic and intrinsic motivation, and how they are used in educational programs. The author explains that most teachers rely solely on extrinsic motivation, which involves using
Summary #3 (chunk #56) - Preview: In this chapter, the author addresses several questions related to stress and learning. They discuss what is considered too little or too much stress, the mechanisms of stress in learning, and the potential benefits of stress in different phases of l
Summary #4 (chunk #78) - Preview: In this passage, the author discusses the challenges and considerations involved in transitioning from a teacher-centered to a student-centered method of teaching. One critical issue is that students may be resistant to changing to a student-centered
Summary #5 (chunk #85) - Preview: This passage discusses the advantages and challenges of online learning programs. One advantage mentioned is the flexibility that online programs offer in terms of when participants can interact with their colleagues. This is particularly beneficial
Summary #6 (chunk #103) - Preview: In this passage, the author discusses the importance of grading and assessment methods that promote critical thinking in students. They argue that if students are only assessed based on a final exam that requires them to repeat information, they will
= "\n".join(summary_list)
summaries
# Convert it back to a document
= Document(page_content=summaries)
summaries
print (f"Your total summary has {llm.get_num_tokens(summaries.page_content)} tokens")
Your total summary has 1719 tokens
= """
combine_prompt You will be given a series of summaries from a book. The summaries will be enclosed in triple backticks (```)
Your goal is to give a verbose summary of what happened in the story.
The reader should be able to grasp what happened in the book.
```{text}```
VERBOSE SUMMARY:
"""
= PromptTemplate(template=combine_prompt, input_variables=["text"]) combine_prompt_template
= load_summarize_chain(llm=llm, chain_type="stuff", prompt=combine_prompt_template) reduce_chain
= reduce_chain.run([summaries])
output print(output)
In this book, the author discusses various aspects of effective teaching and learning. They emphasize the importance of stimulating the brain through different stimuli and presenting information in a captivating way. The author also explores the neural structures involved in attention processing and how teachers can engage students' attention using the bottom-up and top-down methods. Additionally, the book delves into the different types of motivation, extrinsic and intrinsic, and how they impact learning outcomes. The author argues for a balance between the two and suggests strategies to enhance intrinsic motivation. The book also addresses the role of stress and social climate in learning, highlighting their potential benefits and drawbacks. Furthermore, the author discusses the challenges and considerations involved in transitioning from a teacher-centered to a student-centered approach. They emphasize the importance of explaining the new method to students and promoting interaction and reflection among students. The book also explores the advantages and challenges of online learning programs, emphasizing the need for effective design and implementation. Lastly, the author discusses the importance of grading and assessment methods that promote critical thinking in students, advocating for a shift away from memorization-based exams. Overall, the book provides insights and strategies for creating an optimal learning environment and promoting effective learning outcomes.