Documentation
¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
var ParagraphGap = 3.0 // seconds to consider a gap in transcript as a new paragraph
Functions ¶
This section is empty.
Types ¶
type Client ¶
type Client struct {
// contains filtered or unexported fields
}
Client provides methods to fetch YouTube transcripts.
func NewClient ¶
NewClient creates a new YouTube transcript client. timeoutSeconds is applied per-request.
func (*Client) FetchRawTranscript ¶
func (c *Client) FetchRawTranscript(videoID string, config *TranscriptConfig) (*TranscriptRaw, error)
FetchRawTranscript fetches the raw transcript for a YouTube video. videoID may be a full URL or an 11-character video ID. config may be nil to use defaults (English, first available).
func (*Client) FetchTranscript ¶
func (c *Client) FetchTranscript(videoID string, config *TranscriptConfig) (*TranscriptResult, error)
FetchTranscript fetches and smooshes the transcript for a YouTube video.
type IndexEntry ¶
type IndexEntry struct {
// Timestamp is the start time in seconds for this segment
Timestamp float64 `json:"timestamp"`
// Offset is the byte offset into the smooshed text at the Timestamp
Offset int `json:"offset"`
}
IndexEntry represents a mapping from smooshed text offset to timestamp
type Smooshed ¶
type Smooshed struct {
Text string `json:"text"`
Indexes []IndexEntry `json:"indexes"`
}
Smooshed represents the transcript text as a string and includes indexed entries This data is the whole point of this project -- to retrieve a plain text transcript of a youtube video
func ProcessTranscript ¶
func ProcessTranscript(transcript *TranscriptRaw) (*Smooshed, error)
ProcessTranscript converts Youtube's chunky transcript into a smooshed text version with an index.
type TranscriptConfig ¶
type TranscriptConfig struct {
// Lang specifies the language code for the transcript (e.g., "en" for English)
// If empty, the first available language will be used
Lang string
// TimeoutSeconds specifies the timeout for fetching the transcript in seconds
TimeoutSeconds int
}
TranscriptConfig holds configuration for fetching transcripts
type TranscriptEntry ¶
TranscriptEntry represents a single transcript segment
type TranscriptInfo ¶
type TranscriptInfo struct {
VideoID string
VideoTitle string
Language string
LanguageCode string
IsGenerated bool
IsTranslatable bool
}
TranscriptInfo represents the metadata comprising TranscriptRaw
func GetInfo ¶
func GetInfo(transcript *TranscriptRaw) *TranscriptInfo
GetInfo extracts TranscriptInfo from a TranscriptRaw
type TranscriptLine ¶
type TranscriptLine struct {
Text string `json:"text"`
Start float64 `json:"start"`
Duration float64 `json:"duration"`
}
TranscriptLine is a single timed segment of a transcript.
type TranscriptRaw ¶
type TranscriptRaw struct {
VideoID string
VideoTitle string
Language string
LanguageCode string
IsGenerated bool
IsTranslatable bool
Lines []TranscriptLine
}
TranscriptRaw is the raw transcript data for a video.
type TranscriptResult ¶
type TranscriptResult struct {
Info *TranscriptInfo
Smooshed *Smooshed
}
TranscriptResult contains the complete transcript and video metadata