logseq-python-library/examples/video_processing_pipeline_example.py at main · thinmanj/logseq-python-library · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
#!/usr/bin/env python3
"""
Example: Video Processing Pipeline for Logseq

This example demonstrates how to use the video processing pipeline to:
1. Scan a Logseq graph for video URLs
2. Extract video metadata and subtitles
3. Enhance blocks with {{video}} syntax
4. Generate tags from video content
5. Create tagged pages with source information

The pipeline can process YouTube, Vimeo, TikTok, Twitch, and Dailymotion videos.
"""

import sys
import logging
from pathlib import Path

# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent.parent))

from logseq_py.pipeline.video_processor import VideoProcessingPipeline


def main():
    print("🎬 Video Processing Pipeline Example\n")

    # Example configuration
    graph_path = "/Volumes/Projects/logseq/Test"  # Your Logseq graph path

    config = {
        'dry_run': True,  # Set to False to actually modify files
        'youtube_api_key': None,  # Add your YouTube API key for enhanced features
        'tag_prefix': 'video-topic',
        'min_subtitle_length': 100,
        'max_tags_per_video': 5,
        'backup_enabled': True
    }

    print("Configuration:")
    for key, value in config.items():
        print(f"  {key}: {value}")
    print()

    # Set up logging to see what's happening
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
    )

    try:
        # Initialize the pipeline
        pipeline = VideoProcessingPipeline(graph_path, config)

        print("🚀 Running video processing pipeline...")
        print("   This will:")
        print("   1. 🔍 Scan all pages for video URLs")
        print("   2. 📹 Extract video metadata")
        print("   3. 📝 Extract subtitles (YouTube only)")
        print("   4. 🏷️  Analyze content for tags")
        print("   5. ✨ Enhance blocks with {{video}} syntax")
        print("   6. 📄 Create tagged pages")
        print()

        # Run the pipeline
        result = pipeline.run()

        # Display results
        if result['success']:
            print("✅ Pipeline completed successfully!")
            print("\n📊 Results:")
            stats = result['stats']
            print(f"   Blocks processed: {stats['blocks_processed']}")
            print(f"   Videos found: {stats['videos_found']}")
            print(f"   Videos enhanced: {stats['videos_enhanced']}")
            print(f"   Subtitles extracted: {stats['subtitles_extracted']}")
            print(f"   Tags created: {stats['tags_created']}")
            print(f"   Pages created: {stats['pages_created']}")

            if stats['errors'] > 0:
                print(f"   ⚠️  Errors: {stats['errors']}")
        else:
            print(f"❌ Pipeline failed: {result.get('error')}")

    except Exception as e:
        print(f"💥 Error running pipeline: {e}")
        import traceback
        traceback.print_exc()


def demonstrate_individual_features():
    """Demonstrate individual pipeline features."""
    print("\n" + "="*60)
    print("🔧 INDIVIDUAL FEATURE DEMONSTRATIONS")
    print("="*60)

    from logseq_py.pipeline.subtitle_extractor import YouTubeSubtitleExtractor, VideoContentAnalyzer
    from logseq_py.utils import LogseqUtils

    # 1. Video URL extraction
    print("\n1. 📹 Video URL Extraction")
    sample_text = """
    Here are some interesting videos I found:
    - https://www.youtube.com/watch?v=dQw4w9WgXcQ
    - https://vimeo.com/148751763
    - Some other content here
    - https://www.twitch.tv/videos/123456
    """

    video_urls = LogseqUtils.extract_video_urls(sample_text)
    print(f"Found {len(video_urls)} video URLs:")
    for url in video_urls:
        print(f"  • {url}")

    # 2. Video metadata extraction
    print("\n2. 📊 Video Metadata Extraction")
    for url in video_urls[:2]:  # Just test first 2
        info = LogseqUtils.get_video_info(url)
        if info:
            print(f"  {url}")
            print(f"    Title: {info.get('title', 'Unknown')}")
            print(f"    Author: {info.get('author_name', 'Unknown')}")
            print(f"    Platform: {info.get('platform', 'unknown')}")
        else:
            print(f"  {url} - Could not extract info")

    # 3. Content analysis
    print("\n3. 🏷️  Content Analysis")
    analyzer = VideoContentAnalyzer(max_tags=3)

    sample_subtitle = """
    Welcome to this tutorial on machine learning and artificial intelligence.
    Today we'll be covering neural networks, deep learning algorithms,
    and how to implement them in Python. We'll discuss data science
    techniques and show you how to build predictive models for
    business applications and research purposes.
    """

    tags = analyzer.extract_tags(sample_subtitle, "Machine Learning Tutorial")
    print(f"Extracted tags: {tags}")

    # 4. Subtitle extraction (requires youtube-transcript-api)
    print("\n4. 📝 Subtitle Extraction")
    extractor = YouTubeSubtitleExtractor()

    # This would only work with the actual library installed
    print("  Note: Subtitle extraction requires 'youtube-transcript-api' package")
    print("  Install with: pip install youtube-transcript-api")
    print("  Then subtitles can be extracted from YouTube videos")


if __name__ == "__main__":
    print("Choose an option:")
    print("1. Run full pipeline example")
    print("2. Demonstrate individual features")
    print("3. Both")

    choice = input("Enter choice (1/2/3): ").strip()

    if choice in ['1', '3']:
        main()

    if choice in ['2', '3']:
        demonstrate_individual_features()

    print("\n🎉 Example completed!")
    print("\nTo run the pipeline on your actual Logseq graph:")
    print("python scripts/video_processor_cli.py /path/to/your/logseq/graph --dry-run")