package backend
import (
        "os"
        "path"
        "go.mlcdf.fr/sc-backup/internal/domain"
)
// https://github.com/uber-go/guide/blob/master/style.md#verify-interface-compliance
var _ domain.Backend = (*fs)(nil)
type fs struct {
        location  string
        formatter domain.Formatter
}
func NewFS(location string, format domain.Formatter) *fs {
        return &fs{location, format}
}
func (f *fs) Create() error {
        os.MkdirAll(f.location, os.ModePerm)
        return nil
}
func (f *fs) Location() string {
        return f.location
}
func (f *fs) Save(data domain.Serializable) error {
        p := path.Join(f.location, data.Slug()+f.formatter.Ext())
        fd, err := os.Create(p)
        if err != nil {
                return err
        }
        return f.formatter.Format(data, fd)
}
		
		package backup
import (
        "fmt"
        "log"
        "math"
        "net/http"
        "regexp"
        "strconv"
        "strings"
        "time"
        "github.com/PuerkitoBio/goquery"
        "github.com/pkg/errors"
        "go.mlcdf.fr/sc-backup/internal/domain"
        "go.mlcdf.fr/sc-backup/internal/logging"
        "go.mlcdf.fr/sc-backup/internal/pool"
)
const URL = "https://www.senscritique.com"
var Categories = []string{"films", "series", "bd", "livres", "albums", "morceaux"}
var Filters = []string{"done", "wish"}
type parseFunc func(document *goquery.Document) ([]*domain.Entry, error)
var client = &http.Client{
        Timeout: time.Second * 20,
        CheckRedirect: func(req *http.Request, via []*http.Request) error {
                return http.ErrUseLastResponse
        },
}
func request(url string) (*http.Response, error) {
        logging.Debug("GET %s", url)
        res, err := client.Get(url)
        // check for response error
        if err != nil {
                return nil, errors.Wrapf(err, "failed to GET %s", url)
        }
        if res.StatusCode > 400 {
                return nil, fmt.Errorf("error: http %d for url %s", res.StatusCode, res.Request.URL)
        }
        return res, nil
}
func makeCollectionURL(username string, category string, filter string) string {
        return fmt.Sprintf("%s/%s/collection/%s/%s/all/all/all/all/all/all/all/page-", URL, username, filter, category)
}
func makeListURL(url string, index int) string {
        if strings.Contains(url, "page-") {
                re := regexp.MustCompile(`page-(.*)`)
                url = re.ReplaceAllString(url, "page-"+strconv.Itoa(index))
        } else {
                if i := strings.LastIndex(url, "/"); i != -1 {
                        url = url + "/"
                }
                url = url + "page-" + strconv.Itoa(index)
        }
        return url
}
func validateUser(username string) error {
        res, err := request(URL + "/" + username)
        if err != nil {
                return errors.Wrap(err, "failed to validate user")
        }
        if res.StatusCode == 301 {
                return fmt.Errorf("username %s does not exist or has a limited profil", username)
        }
        return nil
}
func isList(document *goquery.Document) bool {
        return document.Find(".elme-listTitle").Length() == 1
}
func parseGenre(s *goquery.Selection) ([]string, error) {
        parsedGenre, err := s.Find("p.elco-baseline.elco-options").Html()
        if err != nil {
                return nil, err
        }
        filterWeirdGenre := func(genres []string) []string {
                out := make([]string, 0)
                for _, genre := range genres {
                        if genre != "sketches" && genre != "" && !strings.Contains(genre, "(France).") {
                                out = append(out, strings.Title(genre))
                        }
                }
                return out
        }
        result := strings.Split(strings.TrimSpace(parsedGenre), "</time>")
        splitWord := func(word string) []string {
                word = strings.Trim(strings.TrimSpace(word), ".")
                array := regexp.MustCompile(`[\,\s]+et[\s]+|\,[\s]+|\s{2,}`).Split(word, -1)
                return array
        }
        if len(result) > 1 {
                return filterWeirdGenre(splitWord(result[1])), nil
        }
        matches := regexp.MustCompile(`[.*\s]*Sortie : .*\.[\s]*(.*)[.\s]*`).FindStringSubmatch(result[0])
        if len(matches) != 2 {
                return nil, nil
        }
        genres := matches[1]
        return filterWeirdGenre(splitWord(genres)), nil
}
func parseDocument(document *goquery.Document) ([]*domain.Entry, error) {
        entries := make([]*domain.Entry, 0)
        document.Find(".elco-collection-item, .elli-item").Each(func(i int, s *goquery.Selection) {
                id, _ := s.Find(".elco-collection-content > .elco-collection-poster, .elli-media figure").Attr("data-sc-product-id")
                title := strings.TrimSpace(s.Find(".elco-title a").Text())
                originalTitle := strings.TrimSpace(s.Find(".elco-original-title").Text())
                var entry = &domain.Entry{
                        ID:            id,
                        Title:         title,
                        OriginalTitle: originalTitle,
                }
                entry.Authors = make([]string, 0, 5)
                s.Find(".elco-product-detail a.elco-baseline-a, .elli-content a.elco-baseline-a").Each(func(i int, s *goquery.Selection) {
                        author := strings.TrimSpace(s.Text())
                        entry.Authors = append(entry.Authors, author)
                })
                parsedDate := strings.TrimSpace(s.Find(".elco-date").Text())
                // some works don't have year, for example Œdipe Roi
                // https://www.senscritique.com/mlcdf/collection/done/livres/all/all/all/all/all/all/list/page-1
                if parsedDate != "" {
                        year, err := strconv.Atoi(parsedDate[1 : len(parsedDate)-1])
                        if err != nil {
                                log.Fatal(err)
                        }
                        entry.Year = year
                }
                var err error
                entry.Genres, err = parseGenre(s)
                if err != nil {
                        log.Fatal(err)
                }
                entry.Comment = strings.TrimSpace(s.Find(".elli-annotation-content").Text())
                entry.Favorite = s.Find(".eins-user-recommend").Length() != 0
                var ratingString string
                if isList(document) {
                        ratingString = strings.TrimSpace(s.Find(".elrua-useraction-inner").Text())
                } else {
                        ratingString = strings.TrimSpace(s.Find(".elco-collection-rating.user > a > div > span").Text())
                }
                if ratingString != "" {
                        rating, err := strconv.Atoi(ratingString)
                        if err != nil {
                                log.Fatal(err)
                        }
                        entry.Rating = rating
                }
                entries = append(entries, entry)
        })
        return entries, nil
}
func collectionSize(document *goquery.Document, filter string) (int, error) {
        _nbOfEntries := strings.TrimSpace(document.Find(fmt.Sprintf("[data-sc-collection-filter=%s] span span", filter)).Text())
        if _nbOfEntries == "" {
                if document.Find(".elco-collection-item-empty").Length() > 0 {
                        return 0, nil
                }
                return 0, fmt.Errorf("error: failed to parsed nbOfEntries")
        }
        nbOfEntries, err := strconv.Atoi(_nbOfEntries[1 : len(_nbOfEntries)-1])
        if err != nil {
                return 0, err
        }
        return nbOfEntries, nil
}
func listSize(document *goquery.Document) (int, error) {
        sizeString := strings.TrimSpace(document.Find("[data-rel=list-products-count]").Text())
        if sizeString == "" {
                return 0, nil
        }
        size, err := strconv.Atoi(sizeString)
        if err != nil {
                return 0, err
        }
        return size, nil
}
func listTitle(document *goquery.Document) (string, error) {
        title := strings.TrimSpace(document.Find(".d-heading1.elme-listTitle").Text())
        if title == "" {
                return "", fmt.Errorf("title cannot be empty")
        }
        return title, nil
}
func listDescription(document *goquery.Document) string {
        return strings.TrimSpace(document.Find("[data-rel=list-description]").Text())
}
func extractPage(url string, parseF parseFunc) ([]*domain.Entry, error) {
        res, err := request(url)
        if err != nil {
                return nil, err
        }
        document, err := goquery.NewDocumentFromResponse(res)
        if err != nil {
                return nil, err
        }
        entries, err := parseF(document)
        if err != nil {
                return nil, err
        }
        return entries, nil
}
// List backs up a list
func List(url string, back domain.Backend) error {
        res, err := request(url)
        if err != nil {
                return err
        }
        err = back.Create()
        if err != nil {
                return err
        }
        document, err := goquery.NewDocumentFromResponse(res)
        if err != nil {
                return err
        }
        size, err := listSize(document)
        if err != nil {
                return errors.Wrapf(err, "%s", url)
        }
        title, err := listTitle(document)
        if err != nil {
                return errors.Wrapf(err, "%s", url)
        }
        entries, err := parseDocument(document)
        if err != nil {
                return err
        }
        list := domain.NewList(entries, title, listDescription(document))
        nbOfPages := math.Ceil(float64(size) / 30)
        if nbOfPages > 1 {
                tasks := []*pool.Task{}
                for i := 2; i <= int(nbOfPages); i++ {
                        i := i
                        tasks = append(tasks, pool.NewTask(func() (interface{}, error) {
                                entries, err := extractPage(makeListURL(url, i), parseDocument)
                                if err != nil {
                                        return nil, err
                                }
                                return entries, nil
                        }))
                }
                p := pool.NewPool(tasks, 20)
                p.Run()
                list.Entries, err = p.Merge(list.Entries)
                if err != nil {
                        return err
                }
        }
        if nbEntries := len(list.Entries); nbEntries != size {
                return fmt.Errorf("the list '%s' has %d entries, but only %d were found", title, size, nbEntries)
        }
        err = back.Save(list)
        if err != nil {
                return err
        }
        return nil
}
// Collection backs up a user collection
func Collection(username string, back domain.Backend) error {
        err := validateUser(username)
        if err != nil {
                return err
        }
        logging.Info("Backing up collection for user %s", username)
        back.Create()
        dates, err := journal(username)
        if err != nil {
                return err
        }
        for _, category := range Categories {
                for _, filter := range Filters {
                        url := makeCollectionURL(username, category, filter)
                        res, err := request(url)
                        if err != nil {
                                return err
                        }
                        document, err := goquery.NewDocumentFromResponse(res)
                        if err != nil {
                                return err
                        }
                        size, err := collectionSize(document, filter)
                        if err != nil {
                                return errors.Wrapf(err, "%s", url)
                        }
                        entries, err := parseDocument(document)
                        if err != nil {
                                return err
                        }
                        collection := domain.NewCollection(entries, category, filter, username)
                        nbOfPages := math.Ceil(float64(size) / 18)
                        if nbOfPages > 1 {
                                tasks := []*pool.Task{}
                                for i := 2; i <= int(nbOfPages); i++ {
                                        i := i
                                        tasks = append(tasks, pool.NewTask(func() (interface{}, error) {
                                                entries, err := extractPage(url+strconv.Itoa(i), parseDocument)
                                                if err != nil {
                                                        return nil, err
                                                }
                                                return entries, nil
                                        }))
                                }
                                p := pool.NewPool(tasks, 20)
                                p.Run()
                                collection.Entries, err = p.Merge(collection.Entries)
                                if err != nil {
                                        return err
                                }
                        }
                        if filter == "done" {
                                for _, entry := range collection.Entries {
                                        for _, d := range dates {
                                                if entry.ID == d.ID {
                                                        entry.DoneDate = d.DoneDate
                                                }
                                        }
                                }
                        }
                        err = back.Save(collection)
                        if err != nil {
                                return err
                        }
                }
        }
        return nil
}
// journal parse a user journal and extract done dates
func journal(username string) ([]*domain.Entry, error) {
        url := URL + "/" + username + "/journal/all/all"
        res, err := request(url)
        if err != nil {
                return nil, err
        }
        document, err := goquery.NewDocumentFromResponse(res)
        if err != nil {
                return nil, err
        }
        size, err := journalSize(document)
        if err != nil {
                return nil, err
        }
        entries, err := extractDoneDate(document)
        if err != nil {
                return nil, err
        }
        nbOfPages := math.Ceil(float64(size) / 20)
        if nbOfPages > 1 {
                tasks := []*pool.Task{}
                for i := 2; i <= int(nbOfPages); i++ {
                        i := i
                        tasks = append(tasks, pool.NewTask(func() (interface{}, error) {
                                entries, err := extractPage(URL+"/"+username+"/journal/all/all/all/page-"+strconv.Itoa(i)+".ajax", extractDoneDate)
                                if err != nil {
                                        return nil, err
                                }
                                return entries, nil
                        }))
                }
                p := pool.NewPool(tasks, 20)
                p.Run()
                entries, err = p.Merge(entries)
                if err != nil {
                        return nil, err
                }
        }
        return entries, nil
}
func extractDoneDate(document *goquery.Document) ([]*domain.Entry, error) {
        entries := make([]*domain.Entry, 0)
        document.Find(".eldi-list-item").Each(func(i int, s *goquery.Selection) {
                date, exists := s.Attr("data-sc-datedone")
                if !exists {
                        // ce n'est pas une oeuvre, mais un titre année ou mois
                        // on les ignore
                        return
                }
                s.Find(".eldi-collection-container").Each(func(i int, s *goquery.Selection) {
                        parsedId, exists := s.Find(".eldi-collection-poster").Attr("data-sc-product-id")
                        if !exists {
                                // pour les épisodes de série, on arrive ici par exemple.
                                // on les ignore
                                return
                        }
                        id := strings.TrimSpace(parsedId)
                        e := &domain.Entry{
                                ID:       id,
                                DoneDate: date,
                        }
                        entries = append(entries, e)
                })
        })
        return entries, nil
}
func journalSize(document *goquery.Document) (int, error) {
        size := 0
        document.Find(".elco-collection-count").Each(func(i int, s *goquery.Selection) {
                parsedValue := strings.TrimSpace(s.Text())
                if parsedValue != "" {
                        nb, err := strconv.Atoi(parsedValue[1 : len(parsedValue)-1])
                        if err != nil {
                                log.Fatal(err)
                        }
                        size += nb
                }
        })
        return size, nil
}
		
		package domain
import (
        "fmt"
        "github.com/metal3d/go-slugify"
)
// Entry represents an entry in a collection or list : a movie, series, books, etc...
type Entry struct {
        ID            string   `json:"id"`
        Title         string   `json:"title"`
        OriginalTitle string   `json:"original_title,omitempty"`
        Year          int      `json:"year,omitempty"`
        Authors       []string `json:"authors"`
        Rating        int      `json:"rating,omitempty"`
        DoneDate      string   `json:"done_date,omitempty"`
        Comment       string   `json:"comment,omitempty"`
        Favorite      bool     `json:"favorite"`
        Genres        []string `json:"genres,omitempty"`
}
var _ Serializable = (*Collection)(nil)
type Collection struct {
        Entries  []*Entry `json:"entries"`
        Category string   `json:"category"`
        Filter   string   `json:"filter"`
        Username string   `json:"username"`
}
func NewCollection(entries []*Entry, Category, Filter, Username string) *Collection {
        return &Collection{
                Entries:  entries,
                Category: Category,
                Filter:   Filter,
                Username: Username,
        }
}
func (c *Collection) Slug() string {
        return fmt.Sprintf("%s-%s", c.Category, c.Filter)
}
func (c *Collection) CSV() []*Entry {
        return c.Entries
}
func (c *Collection) JSON() interface{} {
        return c
}
var _ Serializable = (*List)(nil)
type List struct {
        Entries     []*Entry `json:"entries"`
        Title       string   `json:"title"`
        Description string   `json:"description,omitempty"`
}
func NewList(entries []*Entry, Title, Description string) *List {
        return &List{
                Entries:     entries,
                Title:       Title,
                Description: Description,
        }
}
func (l *List) Slug() string {
        return slugify.Marshal(l.Title, true)
}
func (l *List) CSV() []*Entry {
        return l.Entries
}
func (l *List) JSON() interface{} {
        return l
}
		
		package format
import (
        "encoding/csv"
        "io"
        "strconv"
        "strings"
        "go.mlcdf.fr/sc-backup/internal/domain"
)
var _ domain.Formatter = (*CSV)(nil)
type CSV struct{}
func (f *CSV) Ext() string {
        return ".csv"
}
func (f *CSV) Format(data domain.Serializable, writer io.Writer) error {
        mapMapString := make([][]string, 0, len(data.CSV()))
        w := csv.NewWriter(writer)
        for _, entry := range data.CSV() {
                mapString := []string{
                        entry.ID,
                        entry.Title,
                        entry.OriginalTitle,
                        strconv.Itoa(entry.Year),
                        strings.Join(entry.Authors, ";"),
                        strconv.Itoa(entry.Rating),
                }
                mapMapString = append(mapMapString, mapString)
        }
        err := w.WriteAll(mapMapString)
        return err
}
		
		package format
import (
        "encoding/json"
        "io"
        "go.mlcdf.fr/sc-backup/internal/domain"
)
var _ domain.Formatter = (*JSON)(nil)
type JSON struct {
        pretty bool
}
func NewJSON(pretty bool) *JSON {
        return &JSON{pretty}
}
func (f *JSON) Ext() string {
        return ".json"
}
func (f *JSON) Format(data domain.Serializable, writer io.Writer) error {
        var formatted []byte
        var err error
        if f.pretty {
                formatted, err = json.MarshalIndent(data.JSON(), "", "    ")
        } else {
                formatted, err = json.Marshal(data.JSON())
        }
        if err != nil {
                return err
        }
        _, err = writer.Write(formatted)
        return err
}
		
		package logging
import (
        "fmt"
        "os"
)
var isVerbose bool
// EnableVerboseOutput enables debug logging
func EnableVerboseOutput() {
        isVerbose = true
}
// Info prints an info to stderr
// Most message should be log at this level
func Info(format string, v ...interface{}) {
        fmt.Fprintf(os.Stderr, format+"\n", v...)
}
// Debug prints an debug to stderr in verbose mode
func Debug(format string, v ...interface{}) {
        if isVerbose {
                fmt.Fprintf(os.Stderr, format+"\n", v...)
        }
}
		
		package pool
import (
        "fmt"
        "sync"
        "go.mlcdf.fr/sc-backup/internal/domain"
)
type RunFunc func() (interface{}, error)
// Task encapsulates a work item that should go in a work
// pool.
type Task struct {
        // Err holds an error that occurred during a task. Its
        // result is only meaningful after Run has been called
        // for the pool that holds it.
        Err  error
        Out  interface{}
        Func RunFunc
}
// NewTask initializes a new task based on a given work
// function.
func NewTask(f RunFunc) *Task {
        return &Task{Func: f}
}
// Run runs a Task and does appropriate accounting via a
// given sync.WorkGroup.
func (t *Task) Run(wg *sync.WaitGroup) {
        t.Out, t.Err = t.Func()
        wg.Done()
}
// Pool is a worker group that runs a number of tasks at a
// configured concurrency.
type Pool struct {
        Tasks []*Task
        concurrency int
        tasksChan   chan *Task
        wg          sync.WaitGroup
}
// NewPool initializes a new pool with the given tasks and
// at the given concurrency.
func NewPool(tasks []*Task, concurrency int) *Pool {
        return &Pool{
                Tasks:       tasks,
                concurrency: concurrency,
                tasksChan:   make(chan *Task),
        }
}
// Run runs all work within the pool and blocks until it's
// finished.
func (p *Pool) Run() {
        for i := 0; i < p.concurrency; i++ {
                go p.work()
        }
        p.wg.Add(len(p.Tasks))
        for _, task := range p.Tasks {
                p.tasksChan <- task
        }
        // all workers return
        close(p.tasksChan)
        p.wg.Wait()
}
// The work loop for any single goroutine.
func (p *Pool) work() {
        for task := range p.tasksChan {
                task.Run(&p.wg)
        }
}
// Merge the tasks result
func (p *Pool) Merge(entries []*domain.Entry) ([]*domain.Entry, error) {
        for _, task := range p.Tasks {
                if task.Err != nil {
                        return nil, task.Err
                }
                _out, ok := task.Out.([]*domain.Entry)
                if !ok {
                        return nil, fmt.Errorf("critical: failed to cast to []*Entry. Please open a bug report at https://go.mlcdf.fr/sc-backup")
                }
                entries = append(entries, _out...)
        }
        return entries, nil
}
		
		package main
import (
        "flag"
        "fmt"
        "log"
        "os"
        "path/filepath"
        "runtime/debug"
        "time"
        "go.mlcdf.fr/sc-backup/internal/backend"
        "go.mlcdf.fr/sc-backup/internal/backup"
        "go.mlcdf.fr/sc-backup/internal/domain"
        "go.mlcdf.fr/sc-backup/internal/format"
        "go.mlcdf.fr/sc-backup/internal/logging"
)
const usage = `Usage:
    sc-backup --collection [USERNAME]
    sc-backup --list [URL]
Options:
    -c, --collection USERNAME   Backup a user's collection
    -l, --list URL              Backup a list
    -o, --output PATH           Directory at which to backup the data. Defaults to ./output
    -f, --format json|csv       Export format. Defaults to json
    -p, --pretty                Prettify the JSON exports
    -v, --verbose               Print verbose output
    -V, --version               Print version
Examples:
    sc-backup --collection mlcdf
    sc-backup --list https://www.senscritique.com/liste/Vu_au_cinema/363578
`
// Version can be set at link time to override debug.BuildInfo.Main.Version,
// which is "(devel)" when building from within the module. See
// golang.org/issue/29814 and golang.org/issue/29228.
var Version string
func main() {
        log.SetFlags(0)
        flag.Usage = func() { fmt.Fprintf(os.Stderr, usage) }
        if len(os.Args) == 1 {
                flag.Usage()
                os.Exit(0)
        }
        var (
                isVerboseFlag  bool
                listFlag       string
                collectionFlag string
                outputFlag     string = "output"
                formatFlag     string = "json"
                prettyFlag     bool
                versionFlag    bool
        )
        flag.BoolVar(&versionFlag, "version", versionFlag, "print the version")
        flag.BoolVar(&versionFlag, "V", versionFlag, "print the version")
        flag.BoolVar(&isVerboseFlag, "verbose", isVerboseFlag, "enable verbose output")
        flag.BoolVar(&isVerboseFlag, "v", isVerboseFlag, "enable verbose output")
        flag.StringVar(&listFlag, "list", listFlag, "Download list")
        flag.StringVar(&listFlag, "l", listFlag, "Download list")
        flag.StringVar(&collectionFlag, "collection", collectionFlag, "Download user collection")
        flag.StringVar(&collectionFlag, "c", collectionFlag, "Download user collection")
        flag.StringVar(&outputFlag, "output", outputFlag, "Output directory")
        flag.StringVar(&outputFlag, "o", outputFlag, "Output directory")
        flag.StringVar(&formatFlag, "format", formatFlag, "Output format. Either json or csv. Default to json.")
        flag.StringVar(&formatFlag, "f", formatFlag, "Output format. Either json or csv. Default to json.")
        flag.BoolVar(&prettyFlag, "pretty", prettyFlag, "Pretty output")
        flag.BoolVar(&prettyFlag, "p", prettyFlag, "Pretty output")
        flag.Parse()
        if versionFlag {
                if Version != "" {
                        fmt.Println(Version)
                        return
                }
                if buildInfo, ok := debug.ReadBuildInfo(); ok {
                        fmt.Println(buildInfo.Main.Version)
                        return
                }
                fmt.Println("(unknown)")
                return
        }
        start := time.Now()
        if collectionFlag != "" && listFlag != "" {
                log.Fatalln("error: you can't set --list and --collection at the same time")
        }
        if collectionFlag == "" && listFlag == "" {
                log.Fatalln("error: at least one of --list or --collection is required")
        }
        if formatFlag == "csv" && prettyFlag {
                logging.Info("warning: -p/--pretty is useless with -f/--format csv. CSV won't be prettified.")
        }
        if isVerboseFlag {
                logging.EnableVerboseOutput()
        }
        var back domain.Backend
        var err error
        var formatter domain.Formatter
        switch formatFlag {
        case "json":
                formatter = format.NewJSON(prettyFlag)
        case "csv":
                formatter = &format.CSV{}
        default:
                log.Fatalf("invalid format %s: it should be json|csv|html", formatFlag)
        }
        if collectionFlag != "" {
                back = backend.NewFS(filepath.Join(outputFlag, collectionFlag), formatter)
                err = backup.Collection(collectionFlag, back)
        }
        if listFlag != "" {
                back = backend.NewFS(outputFlag, formatter)
                err = backup.List(listFlag, back)
        }
        if err != nil {
                log.Fatalf("error: %s", err)
        }
        to, err := filepath.Abs(back.Location())
        if err != nil {
                to = back.Location()
        }
        logging.Info("Saved to %s in %s", to, time.Since(start).Round(time.Millisecond).String())
}