Dodanie request do strumienia z responsem

1

Mam tak o:

val requests: Seq[Requests] = dbService.getRequests()

requests.foreach( scrapper.crape(_) )        // :Seq[Document]
      .map( fetcher.fetch(_) )          // :Seq[Response]
      .map( new ResultsDao(_) )    // :Seq[ResultsDao]
      .foreach( consume(_) )

ok, tylko chciałbym by ten strumień miał też odpowiadający request na każdym etapie

val requests: Seq[Requests] = dbService.getRequests()

requests.foreach( r => (r, scrapper.crape(r)) )        // :Seq[(Request, Document)]
      .map( (_._1, fetcher.fetch(_._2)) )          // :Seq[(Request, Response)]
      .map( (_._1, new ResultsDao(_._2)) )    // :Seq[(Request, ResultsDao)]
      .foreach( consume(_._1, _._2) )

i już się chlew zrobił.
Postanowiłem, więc zrobić na to klasę, która będzie przechowywać sobie request, a jak miałem już klasę to dołożyłem bildera:

import net.ruippeixotog.scalascraper.model.Document
import org.slf4j.{Logger, LoggerFactory}
import com.typesafe.scalalogging.Logger


import scala.util.Random

object ScrapperManager {

  private[scrapper] def getInstance(scrape: (RequestCount, ScrapperConfig) => Document,
                                    fetch: Document => ResponseCount,
                                    consume: (RequestCount, ResponseCount) => Unit,
                                    configs: Seq[ScrapperConfig]): ScrapperManager = {
    this.getInstance(scrape, Seq(fetch), Seq(consume), configs)
  }

  private[scrapper] def getInstance(scrape: (RequestCount, ScrapperConfig) => Document,
                                    fetchers: Seq[Document => ResponseCount],
                                    consumers: Seq[(RequestCount, ResponseCount) => Unit],
                                    configs: Seq[ScrapperConfig]): ScrapperManager = {
    new ManagerInner(scrape, fetchers, consumers, configs)
  }

  class ManagerInner(scrape: (RequestCount, ScrapperConfig) => Document,
                     fetchers: Seq[Document => ResponseCount],
                     consumers: Seq[(RequestCount, ResponseCount) => Unit],
                     configs: Seq[ScrapperConfig]) extends ScrapperManager with FetcherManager with ConsumerManager {

    val logger: Logger = Logger(LoggerFactory.getLogger(this.getClass))
    private var currentConfig: ScrapperConfig = null
    private var doc: Document = null
    private var responses: Seq[ResponseCount] = null
    private var request: RequestCount = null

    override def scrape(request: RequestCount): FetcherManager = {
      this.request = request
      currentConfig = adjustConfig(currentConfig)
      this.doc = scrape(request, currentConfig)
      this
    }

    override def fetch(): ConsumerManager = {
      var responses: Seq[ResponseCount] = Seq()
      for (fetch <- fetchers) {
        responses = responses :+ fetch(doc)
      }
      this.responses = responses
      this
    }

    override def consumeAll(): Unit = {
      for (i <- 0 to consumers.length - 1)
        consumers(i)(request, responses(i))
    }

    override def getDoc: Document = {
      this.doc
    }

    override def getResponses: Seq[ResponseCount] = {
      this.responses
    }

    private def adjustConfig(config: ScrapperConfig): ScrapperConfig = {
      val r = Random.nextDouble()
      if (config == null || config.changeProbability > r) {
        val idx = Random.nextInt(configs.length)
        return configs(idx)
      }
      config
    }

  }

}

trait DocsGetter {
  def getDoc: Document
}

trait ResponsesGetter {
  def getResponses: Seq[ResponseCount]
}

trait ScrapperManager extends DocsGetter {
  def scrape(request: RequestCount): FetcherManager
}

trait FetcherManager extends DocsGetter with ResponsesGetter {
  def fetch(): ConsumerManager
}

trait ConsumerManager extends DocsGetter with ResponsesGetter {
  def consumeAll(): Unit
}









-------------------------






import net.ruippeixotog.scalascraper.model.Document

object ScrapperManagerBuilder {

  def getInstance: WithScrapper = {
    new ManagerBuilderInner
  }

  class ManagerBuilderInner() extends WithScrapper with WithFetcher with WithConsume with WithConfigs with Builder {

    private var scrape: (RequestCount, ScrapperConfig) => Document = null
    private var fetchers: Seq[Document => ResponseCount] = null
    private var consumers: Seq[(RequestCount, ResponseCount) => Unit] = null
    private var configs: Seq[ScrapperConfig] = null

    override def withScrapper(scrape: (RequestCount, ScrapperConfig) => Document): WithFetcher = {
      this.scrape = scrape
      this
    }

    override def withFetcher(fetch: Document => ResponseCount): WithConsume = {
      withFetchers(Seq(fetch))
    }

    override def withFetchers(fetchers: Seq[Document => ResponseCount]): WithConsume = {
      this.fetchers = fetchers
      this
    }

    override def withConsumer(consume: (RequestCount, ResponseCount) => Unit): WithConfigs = {
      withConsumers(Seq(consume))
    }

    override def withConsumers(consumers: Seq[(RequestCount, ResponseCount) => Unit]): WithConfigs = {
      this.consumers = consumers
      this
    }

    override def withConfigs(configs: Seq[ScrapperConfig]): Builder = {
      this.configs = configs
      this
    }

    def validateArgs: Unit = {
      if (fetchers.length != consumers.length) {
        val msg = "fetchers and cousmer lengths must correspond to each other"
        throw new IllegalArgumentException(msg)
      }
    }

    override def build(): ScrapperManager = {
      validateArgs
      ScrapperManager.getInstance(scrape, fetchers, consumers, configs)
    }
  }

  trait WithScrapper {
    def withScrapper(scrapper: (RequestCount, ScrapperConfig) => Document): WithFetcher
  }

  trait WithFetcher {
    def withFetcher(fetch: Document => ResponseCount): WithConsume
    def withFetchers(fetchers: Seq[Document => ResponseCount]): WithConsume
  }

  trait WithConsume {
    def withConsumer(consume: (RequestCount, ResponseCount) => Unit): WithConfigs

    def withConsumers(consumers: Seq[(RequestCount, ResponseCount) => Unit]): WithConfigs
  }

  trait WithConfigs {
    def withConfigs(configs: Seq[ScrapperConfig]): Builder
  }

  trait Builder {
    def build(): ScrapperManager
  }

}
val scrape = (request, config) => new ScrapperCount().scrape(request, config)
    val fetch = doc => new FetcherCount().fetch(doc)
    val consume = getConsumeFun // each element of consumers must correspond to the fetcher element in same order
    val configs = getConfigs
    val requests = getRequests

    val manager = ScrapperManagerBuilder.getInstance
      .withScrapper(scrape)
      .withConfigs(configs)
      .build()

   requests.foreach(
      manager.scrape(_).fetch().consumeAll()
    )

title

:(

1

Po co pchasz gettery w stylu Javy do Scali

trait DocsGetter {
  def getDoc: Document
}

nie lepiej?

trait DocsGetter {
  def doc: Document
}

To też nadmiarowe:

    override def getDoc: Document = {
      this.doc
    }

Przecież Scala generuje gettery. jak chcesz w stylu Javy to trzeba dać adnotacje

@BeanProperty
private var doc: Document = null

Dalej. Scala to nie Java i traity mogą mieć pola więc można napisać:

trait DocsGetter {
  protected var doc: Document 
  def getDoc: Document = doc
}
0

A może zamiast osobno procesować Request i Response zrobić obiekt, który zawierałby Request i Response, tylko jak go nazwać? Envelope?

1

Spróbuj to sobie wszystko napisać w jednej funkcji, żeby było jak najmniej kodu (dobierz przy tym jak najlepsze nazwy) i wtedy zobaczysz co i jak i ew. podzielisz rozsądnie - mam wrażenie, że wpadłeś trochę w pułapkę OOP tj. rozbić wszystko na klasy i metody, co nie zawsze jest dobrą droga i często komplikuje tylko niepotrzebnie kod.

0

ewentualnie tak moze:

    var responseAndRequest: Seq[(RequestCount, ResponseCount)] = Seq()
    for (request <- requests) {
      val doc = manager.scrape(request)
      val response = fetcher.fetch(doc)
      responseAndRequest = responseAndRequest :+ (request, response)
    }
    val dao = new ResultsDao(responseAndRequest)
    consume(dao)

ale malo nowoczesnie

1
Julian_ napisał(a):

ewentualnie tak moze:

    var responseAndRequest: Seq[(RequestCount, ResponseCount)] = Seq()
    for (request <- requests) {
      val doc = manager.scrape(request)
      val response = fetcher.fetch(doc)
      responseAndRequest = responseAndRequest :+ (request, response)
    }
    val dao = new ResultsDao(responseAndRequest)
    consume(dao)

ale malo nowoczesnie

To polecam for-yield (inspirowane haskellową notacją do) by pozbyć się var:

  val responseAndRequest: Seq[(RequestCount, ResponseCount)] = for (request <- requests) 
  yield {
      val doc = manager.scrape(request)
      val response = fetcher.fetch(doc)
     (request, response)
  }

Chyba też zadziała:

  val responseAndRequest: Seq[(RequestCount, ResponseCount)] = for {
    request <- requests
    val doc = manager.scrape(request)
    val response = fetcher.fetch(doc)
  } yield (request, response)

1 użytkowników online, w tym zalogowanych: 0, gości: 1