File tree Expand file tree Collapse file tree 1 file changed +9
-4
lines changed
Expand file tree Collapse file tree 1 file changed +9
-4
lines changed Original file line number Diff line number Diff line change 88import shutil
99
1010import click
11+ import numpy as np
1112import pandas as pd
1213from datasets import load_dataset
1314from sentence_transformers import SentenceTransformer
@@ -19,18 +20,22 @@ def add_embedding_projection(df: pd.DataFrame, text: str):
1920 texts = list (df [text ])
2021
2122 transformer = SentenceTransformer ("all-MiniLM-L6-v2" )
22- hidden_vectors = transformer .encode (texts )
23+ hidden_vectors = transformer .encode (texts , show_progress_bar = True )
24+
25+ random_state = np .random .RandomState (42 )
2326
2427 knn = nearest_neighbors (
2528 hidden_vectors ,
2629 n_neighbors = 15 ,
2730 metric = "cosine" ,
2831 metric_kwds = None ,
2932 angular = False ,
30- random_state = None ,
33+ random_state = random_state ,
3134 )
3235
33- proj = UMAP (metric = "cosine" , precomputed_knn = knn ).fit_transform (hidden_vectors )
36+ proj = UMAP (
37+ metric = "cosine" , precomputed_knn = knn , random_state = random_state
38+ ).fit_transform (hidden_vectors )
3439
3540 df ["projection_x" ] = proj [:, 0 ] # type: ignore
3641 df ["projection_y" ] = proj [:, 1 ] # type: ignore
@@ -55,7 +60,7 @@ def main(output: str):
5560 ]
5661
5762 ds = load_dataset (name , split = "train" )
58- df = ds .to_pandas (). sample ( 100 ) [columns ] # type: ignore
63+ df = ds .to_pandas ()[columns ] # type: ignore
5964
6065 add_embedding_projection (df , text = "description" )
6166
You can’t perform that action at this time.
0 commit comments