Mam tak o:
val requests: Seq[Requests] = dbService.getRequests()
requests.foreach( scrapper.crape(_) ) // :Seq[Document]
.map( fetcher.fetch(_) ) // :Seq[Response]
.map( new ResultsDao(_) ) // :Seq[ResultsDao]
.foreach( consume(_) )
ok, tylko chciałbym by ten strumień miał też odpowiadający request na każdym etapie
val requests: Seq[Requests] = dbService.getRequests()
requests.foreach( r => (r, scrapper.crape(r)) ) // :Seq[(Request, Document)]
.map( (_._1, fetcher.fetch(_._2)) ) // :Seq[(Request, Response)]
.map( (_._1, new ResultsDao(_._2)) ) // :Seq[(Request, ResultsDao)]
.foreach( consume(_._1, _._2) )
i już się chlew zrobił.
Postanowiłem, więc zrobić na to klasę, która będzie przechowywać sobie request, a jak miałem już klasę to dołożyłem bildera:
import net.ruippeixotog.scalascraper.model.Document
import org.slf4j.{Logger, LoggerFactory}
import com.typesafe.scalalogging.Logger
import scala.util.Random
object ScrapperManager {
private[scrapper] def getInstance(scrape: (RequestCount, ScrapperConfig) => Document,
fetch: Document => ResponseCount,
consume: (RequestCount, ResponseCount) => Unit,
configs: Seq[ScrapperConfig]): ScrapperManager = {
this.getInstance(scrape, Seq(fetch), Seq(consume), configs)
}
private[scrapper] def getInstance(scrape: (RequestCount, ScrapperConfig) => Document,
fetchers: Seq[Document => ResponseCount],
consumers: Seq[(RequestCount, ResponseCount) => Unit],
configs: Seq[ScrapperConfig]): ScrapperManager = {
new ManagerInner(scrape, fetchers, consumers, configs)
}
class ManagerInner(scrape: (RequestCount, ScrapperConfig) => Document,
fetchers: Seq[Document => ResponseCount],
consumers: Seq[(RequestCount, ResponseCount) => Unit],
configs: Seq[ScrapperConfig]) extends ScrapperManager with FetcherManager with ConsumerManager {
val logger: Logger = Logger(LoggerFactory.getLogger(this.getClass))
private var currentConfig: ScrapperConfig = null
private var doc: Document = null
private var responses: Seq[ResponseCount] = null
private var request: RequestCount = null
override def scrape(request: RequestCount): FetcherManager = {
this.request = request
currentConfig = adjustConfig(currentConfig)
this.doc = scrape(request, currentConfig)
this
}
override def fetch(): ConsumerManager = {
var responses: Seq[ResponseCount] = Seq()
for (fetch <- fetchers) {
responses = responses :+ fetch(doc)
}
this.responses = responses
this
}
override def consumeAll(): Unit = {
for (i <- 0 to consumers.length - 1)
consumers(i)(request, responses(i))
}
override def getDoc: Document = {
this.doc
}
override def getResponses: Seq[ResponseCount] = {
this.responses
}
private def adjustConfig(config: ScrapperConfig): ScrapperConfig = {
val r = Random.nextDouble()
if (config == null || config.changeProbability > r) {
val idx = Random.nextInt(configs.length)
return configs(idx)
}
config
}
}
}
trait DocsGetter {
def getDoc: Document
}
trait ResponsesGetter {
def getResponses: Seq[ResponseCount]
}
trait ScrapperManager extends DocsGetter {
def scrape(request: RequestCount): FetcherManager
}
trait FetcherManager extends DocsGetter with ResponsesGetter {
def fetch(): ConsumerManager
}
trait ConsumerManager extends DocsGetter with ResponsesGetter {
def consumeAll(): Unit
}
-------------------------
import net.ruippeixotog.scalascraper.model.Document
object ScrapperManagerBuilder {
def getInstance: WithScrapper = {
new ManagerBuilderInner
}
class ManagerBuilderInner() extends WithScrapper with WithFetcher with WithConsume with WithConfigs with Builder {
private var scrape: (RequestCount, ScrapperConfig) => Document = null
private var fetchers: Seq[Document => ResponseCount] = null
private var consumers: Seq[(RequestCount, ResponseCount) => Unit] = null
private var configs: Seq[ScrapperConfig] = null
override def withScrapper(scrape: (RequestCount, ScrapperConfig) => Document): WithFetcher = {
this.scrape = scrape
this
}
override def withFetcher(fetch: Document => ResponseCount): WithConsume = {
withFetchers(Seq(fetch))
}
override def withFetchers(fetchers: Seq[Document => ResponseCount]): WithConsume = {
this.fetchers = fetchers
this
}
override def withConsumer(consume: (RequestCount, ResponseCount) => Unit): WithConfigs = {
withConsumers(Seq(consume))
}
override def withConsumers(consumers: Seq[(RequestCount, ResponseCount) => Unit]): WithConfigs = {
this.consumers = consumers
this
}
override def withConfigs(configs: Seq[ScrapperConfig]): Builder = {
this.configs = configs
this
}
def validateArgs: Unit = {
if (fetchers.length != consumers.length) {
val msg = "fetchers and cousmer lengths must correspond to each other"
throw new IllegalArgumentException(msg)
}
}
override def build(): ScrapperManager = {
validateArgs
ScrapperManager.getInstance(scrape, fetchers, consumers, configs)
}
}
trait WithScrapper {
def withScrapper(scrapper: (RequestCount, ScrapperConfig) => Document): WithFetcher
}
trait WithFetcher {
def withFetcher(fetch: Document => ResponseCount): WithConsume
def withFetchers(fetchers: Seq[Document => ResponseCount]): WithConsume
}
trait WithConsume {
def withConsumer(consume: (RequestCount, ResponseCount) => Unit): WithConfigs
def withConsumers(consumers: Seq[(RequestCount, ResponseCount) => Unit]): WithConfigs
}
trait WithConfigs {
def withConfigs(configs: Seq[ScrapperConfig]): Builder
}
trait Builder {
def build(): ScrapperManager
}
}
val scrape = (request, config) => new ScrapperCount().scrape(request, config)
val fetch = doc => new FetcherCount().fetch(doc)
val consume = getConsumeFun // each element of consumers must correspond to the fetcher element in same order
val configs = getConfigs
val requests = getRequests
val manager = ScrapperManagerBuilder.getInstance
.withScrapper(scrape)
.withConfigs(configs)
.build()
requests.foreach(
manager.scrape(_).fetch().consumeAll()
)
:(