-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathflatdeck.sh
More file actions
84 lines (71 loc) · 2.89 KB
/
flatdeck.sh
File metadata and controls
84 lines (71 loc) · 2.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#!/bin/bash
# FlatDeck - Shell script to process PDF documents through multiple steps
# Show usage if no arguments provided
if [ $# -eq 0 ]; then
echo "Usage: $0 /path/to/document.pdf"
exit 1
fi
# Get absolute path to the PDF
PDF_PATH=$(realpath "$1")
PDF_NAME=$(basename "$PDF_PATH" .pdf)
echo "==========================================="
echo "FlatDeck PDF Processing Pipeline"
echo "==========================================="
echo "PDF: $PDF_PATH"
echo "==========================================="
# Step 1: PDF Ingestion
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Starting PDF ingestion..."
python flatdeck_pdf_ingestor.py "$PDF_PATH"
if [ $? -ne 0 ]; then
echo "ERROR: PDF ingestion failed!"
exit 1
fi
echo "[$(date '+%Y-%m-%d %H:%M:%S')] PDF ingestion completed."
# Allow some time for GPU resources to be released
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Waiting for GPU resources to be released..."
sleep 2
# Step 2: Image Description
echo "-------------------------------------------"
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Starting image description..."
python flatdeck_image_descriptor.py
if [ $? -ne 0 ]; then
echo "WARNING: Image description failed. Continuing with processing..."
fi
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Image description completed."
# Allow some time for GPU resources to be released
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Waiting for GPU resources to be released..."
sleep 2
# Step 3: OCR Text Processing
echo "-------------------------------------------"
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Starting OCR text processing..."
python flatdeck_chat_processor.py --task ocr_fix
if [ $? -ne 0 ]; then
echo "WARNING: OCR text processing failed. Continuing with markdown generation..."
fi
echo "[$(date '+%Y-%m-%d %H:%M:%S')] OCR text processing completed."
# Allow some time for GPU resources to be released
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Waiting for GPU resources to be released..."
sleep 2
# Step 4: Page Summary Processing
echo "-------------------------------------------"
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Starting page summary creation..."
python flatdeck_chat_processor.py --task summary
if [ $? -ne 0 ]; then
echo "WARNING: Page summary processing failed. Continuing with markdown generation..."
fi
echo "[$(date '+%Y-%m-%d %H:%M:%S')] OCR text processing completed."
# Step 5: Markdown Generation
echo "-------------------------------------------"
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Generating markdown output..."
python flatdeck_markdown.py "$PDF_PATH" --output_type summary
if [ $? -ne 0 ]; then
echo "ERROR: Markdown generation failed!"
exit 1
fi
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Markdown generation completed."
# Show output location
MARKDOWN_FILE="$OUTPUT_DIR/${PDF_NAME}_*.md"
echo "==========================================="
echo "Processing completed successfully!"
echo "Output markdown file(s): $MARKDOWN_FILE"
echo "==========================================="