Skip to content

Commit b1af7d9

Browse files
committed
test: full demo data
1 parent 3ad07cd commit b1af7d9

File tree

1 file changed

+9
-4
lines changed

1 file changed

+9
-4
lines changed

packages/docs/generate_demo_data.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import shutil
99

1010
import click
11+
import numpy as np
1112
import pandas as pd
1213
from datasets import load_dataset
1314
from sentence_transformers import SentenceTransformer
@@ -19,18 +20,22 @@ def add_embedding_projection(df: pd.DataFrame, text: str):
1920
texts = list(df[text])
2021

2122
transformer = SentenceTransformer("all-MiniLM-L6-v2")
22-
hidden_vectors = transformer.encode(texts)
23+
hidden_vectors = transformer.encode(texts, show_progress_bar=True)
24+
25+
random_state = np.random.RandomState(42)
2326

2427
knn = nearest_neighbors(
2528
hidden_vectors,
2629
n_neighbors=15,
2730
metric="cosine",
2831
metric_kwds=None,
2932
angular=False,
30-
random_state=None,
33+
random_state=random_state,
3134
)
3235

33-
proj = UMAP(metric="cosine", precomputed_knn=knn).fit_transform(hidden_vectors)
36+
proj = UMAP(
37+
metric="cosine", precomputed_knn=knn, random_state=random_state
38+
).fit_transform(hidden_vectors)
3439

3540
df["projection_x"] = proj[:, 0] # type: ignore
3641
df["projection_y"] = proj[:, 1] # type: ignore
@@ -55,7 +60,7 @@ def main(output: str):
5560
]
5661

5762
ds = load_dataset(name, split="train")
58-
df = ds.to_pandas().sample(100)[columns] # type: ignore
63+
df = ds.to_pandas()[columns] # type: ignore
5964

6065
add_embedding_projection(df, text="description")
6166

0 commit comments

Comments
 (0)