Skip to content

Commit 201caec

Browse files
Updates
1 parent c068855 commit 201caec

File tree

9 files changed

+101
-163
lines changed

9 files changed

+101
-163
lines changed

src/JuliaHealthLLM.jl

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,19 @@
11
module JuliaHealthLLM
22

3-
using LibGit2
4-
using Downloads
5-
using Dates
63
using PromptingTools
7-
using TextAnalysis
8-
using Serialization, Base.Threads
9-
using Transformers, CUDA
10-
using LlamaCpp, JSON3, HuggingFaceTokenizers, Jjama3
11-
using JSON, Markdown
4+
using PromptingTools.Experimental.RAGTools
5+
using LinearAlgebra, SparseArrays
6+
using JSON3, Serialization
7+
using Statistics
8+
using LibPQ
129

13-
# TODO: Add functions from scripts
14-
include("../scripts/clone.jl")
15-
include("../scripts/knowledge.jl")
16-
include("../scripts/restore_submodules.jl")
17-
include("../scripts/update_julia_package_repos.jl")
18-
include("../scripts/update_and_record.jl")
19-
include("../src/chunk_docs.jl")
10+
include("utils.jl")
11+
include("pgvector.jl")
12+
include("database.jl")
13+
include("embedding.jl")
14+
include("query.jl")
2015

21-
export clone_repositories, normalize_repo_url, clone_repo, is_textual_file,
22-
build_corpus_from_repo, save_corpus, process_directory, update_and_record_submodules,
23-
restore_all_submodules, update_julia_package_repos, chunk_file_content, traverse_repository
16+
export collect_files_with_extensions, write_combined_file, generate_funsql_query,
17+
build_index_rag, store_embeddings_pgvector
2418

2519
end

src/chunk_docs.jl

Lines changed: 0 additions & 115 deletions
This file was deleted.

src/database.jl

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
module Database
2+
3+
function store_embeddings_pgvector(conn::LibPQ.Connection, embeddings::AbstractMatrix, chunks::AbstractVector, embedding_dimension::Int)
4+
LibPQ.execute(conn, """
5+
CREATE TABLE IF NOT EXISTS embeddings (
6+
id SERIAL PRIMARY KEY,
7+
chunk TEXT NOT NULL,
8+
embedding VECTOR($embedding_dimension)
9+
)
10+
""")
11+
12+
embeddings = convert(Matrix{Float64}, embeddings)
13+
14+
chunks = String.(chunks)
15+
16+
for i in 1:eachindex(embeddings, 2)
17+
chunk = chunks[i]
18+
embedding = Pgvector.convert(embeddings[:, i])
19+
LibPQ.execute(conn, """
20+
INSERT INTO embeddings (chunk, embedding)
21+
VALUES (\$1, \$2)
22+
""", (chunk, embedding))
23+
end
24+
25+
println("Embeddings successfully stored in the database.")
26+
end
27+
28+
end

src/dummy_src_file.jl

Lines changed: 0 additions & 11 deletions
This file was deleted.

src/embedding.jl

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
module Embedding
2+
3+
function build_index_rag(cfg, files; embedder_kwargs=())
4+
return PromptingTools.Experimental.RAGTools.build_index(cfg, files; embedder_kwargs=embedder_kwargs)
5+
end
6+
7+
end

src/pgvector.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
module Pgvector
2+
convert(v::AbstractVector{T}) where T<:Real = string("[", join(v, ","), "]")
3+
end

src/query.jl

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
module Query
2+
3+
function generate_funsql_query(index, model_embedding::String, model_name::String, prompt_template::String, question::String)
4+
PromptingTools.MODEL_CHAT = model_name
5+
PromptingTools.MODEL_EMBEDDING = model_embedding
6+
7+
PT.register_model!(name=MODEL_CHAT, schema=PT.OllamaSchema())
8+
PT.register_model!(name=MODEL_EMBEDDING, schema=PT.OllamaSchema())
9+
10+
prompt = replace(prompt_template, "{input_query}" => question)
11+
12+
answer = airag(index;
13+
question=prompt,
14+
retriever_kwargs=(model=model_embedding, schema=PT.OllamaSchema(), embedder_kwargs=(schema=PT.OllamaSchema(), model=MODEL_EMBEDDING)),
15+
generator_kwargs=(model=model_name, schema=PT.OllamaSchema())
16+
)
17+
18+
return answer
19+
end
20+
21+
end

src/rag.jl

Lines changed: 0 additions & 19 deletions
This file was deleted.

src/utils.jl

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
module Utils
2+
3+
function collect_files_with_extensions(directory::String, extensions::Vector{String})
4+
files = String[]
5+
for (root, _, file_names) in walkdir(directory)
6+
for file_name in file_names
7+
if any(endswith(file_name, ext) for ext in extensions)
8+
relative_path = joinpath(relpath(root, directory), file_name)
9+
push!(files, relative_path)
10+
end
11+
end
12+
end
13+
return files
14+
end
15+
16+
function write_combined_file(files::Vector{String}, output_file::String)
17+
open(output_file, "w") do io
18+
for file in files
19+
println(io, "# File: $file")
20+
open(file, "r") do f
21+
for line in eachline(f)
22+
println(io, line)
23+
end
24+
end
25+
println(io, "\n")
26+
end
27+
end
28+
end
29+
30+
end

0 commit comments

Comments
 (0)