use super::Deviation;
use once_cell::sync::Lazy;
use regex::Regex;
use std::collections::HashMap;
use url::Url;
#[derive(Debug, thiserror::Error)]
pub enum FromHtmlStrError {
#[error("missing initial state")]
MissingInitialState,
#[error(transparent)]
InvalidJson(#[from] serde_json::Error),
}
#[derive(Debug, serde::Deserialize)]
pub struct ScrapedWebPageInfo {
#[serde(rename = "@@config")]
pub config: Config,
#[serde(rename = "@@entities")]
pub entities: Option<Entities>,
#[serde(rename = "@@DUPERBROWSE")]
pub duper_browse: Option<DuperBrowse>,
#[serde(rename = "@@publicSession")]
pub public_session: PublicSession,
#[serde(rename = "@@streams")]
pub streams: Option<Streams>,
#[serde(rename = "csrfToken")]
pub csrf_token: Option<Box<str>>,
#[serde(rename = "luToken")]
pub lu_token: Option<Box<str>>,
#[serde(rename = "luToken2")]
pub lu_token2: Option<Box<str>>,
#[serde(flatten)]
pub unknown: HashMap<String, serde_json::Value>,
}
impl ScrapedWebPageInfo {
pub fn from_html_str(input: &str) -> Result<Self, FromHtmlStrError> {
static REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(r#"window\.__INITIAL_STATE__ = JSON\.parse\("(.*)"\);"#).unwrap()
});
let capture = REGEX
.captures(input)
.and_then(|captures| captures.get(1))
.ok_or(FromHtmlStrError::MissingInitialState)?;
let capture = capture
.as_str()
.replace("\\\"", "\"")
.replace("\\'", "'")
.replace("\\\\", "\\");
Ok(serde_json::from_str(&capture)?)
}
pub fn is_logged_in(&self) -> bool {
self.public_session.is_logged_in
}
pub fn get_current_deviation_id(&self) -> Option<&serde_json::Value> {
Some(
&self
.duper_browse
.as_ref()?
.root_stream
.as_ref()?
.current_open_item,
)
}
pub fn get_current_deviation(&self) -> Option<&Deviation> {
let id = self.get_current_deviation_id()?;
let id = match id {
serde_json::Value::Number(n) => n.as_u64()?,
serde_json::Value::String(s) => s.parse().ok()?,
_ => return None,
};
self.get_deviation_by_id(id)
}
pub fn get_current_deviation_extended(&self) -> Option<&DeviationExtended> {
let id = self.get_current_deviation_id()?;
let mut key_buffer = itoa::Buffer::new();
let key = match id {
serde_json::Value::Number(n) => {
let n = n.as_u64()?;
key_buffer.format(n)
}
serde_json::Value::String(s) => s,
_ => return None,
};
self.entities
.as_ref()?
.deviation_extended
.as_ref()?
.get(key)
}
pub fn get_deviation_by_id(&self, id: u64) -> Option<&Deviation> {
let mut key_buffer = itoa::Buffer::new();
self.entities.as_ref()?.deviation.get(key_buffer.format(id))
}
pub fn take_deviation_by_id(&mut self, id: u64) -> Option<Deviation> {
let mut key_buffer = itoa::Buffer::new();
self.entities
.as_mut()?
.deviation
.remove(key_buffer.format(id))
}
}
#[derive(Debug, serde::Deserialize)]
pub struct Config {
#[serde(rename = "csrfToken")]
pub csrf_token: String,
#[serde(flatten)]
pub unknown: HashMap<String, serde_json::Value>,
}
#[derive(Debug, serde::Deserialize)]
pub struct Entities {
pub deviation: HashMap<String, Deviation>,
#[serde(rename = "deviationExtended")]
pub deviation_extended: Option<HashMap<String, DeviationExtended>>,
#[serde(flatten)]
pub unknown: HashMap<String, serde_json::Value>,
}
#[derive(Debug, serde::Deserialize)]
pub struct DeviationExtended {
pub download: Option<Download>,
pub description: Option<String>,
#[serde(flatten)]
pub unknown: HashMap<String, serde_json::Value>,
}
#[derive(Debug, serde::Deserialize)]
pub struct Download {
pub filesize: u64,
pub height: u32,
pub width: u32,
#[serde(rename = "type")]
pub kind: String,
pub url: Url,
#[serde(flatten)]
pub unknown: HashMap<String, serde_json::Value>,
}
#[derive(Debug, serde::Deserialize)]
pub struct DuperBrowse {
#[serde(rename = "rootStream")]
pub root_stream: Option<RootStream>,
#[serde(flatten)]
pub unknown: HashMap<String, serde_json::Value>,
}
#[derive(Debug, serde::Deserialize)]
pub struct RootStream {
#[serde(rename = "currentOpenItem")]
pub current_open_item: serde_json::Value,
#[serde(flatten)]
pub unknown: HashMap<String, serde_json::Value>,
}
#[derive(Debug, serde::Deserialize)]
pub struct PublicSession {
#[serde(rename = "isLoggedIn")]
pub is_logged_in: bool,
#[serde(flatten)]
pub unknown: HashMap<String, serde_json::Value>,
}
#[derive(Debug, serde::Deserialize)]
pub struct Streams {
#[serde(rename = "@@BROWSE_PAGE_STREAM")]
pub browse_page_stream: Option<BrowsePageStream>,
#[serde(flatten)]
pub unknown: HashMap<String, serde_json::Value>,
}
#[derive(Debug, serde::Deserialize)]
pub struct BrowsePageStream {
pub cursor: String,
#[serde(rename = "hasLess")]
pub has_less: bool,
#[serde(rename = "hasMore")]
pub has_more: bool,
pub items: Vec<serde_json::Value>,
#[serde(rename = "itemsPerFetch")]
pub items_per_fetch: u64,
#[serde(rename = "streamParams")]
pub stream_params: StreamParams,
#[serde(rename = "streamType")]
pub stream_type: String,
#[serde(rename = "streamId")]
pub stream_id: String,
#[serde(rename = "fetchNextCallback")]
pub fetch_next_callback: String,
#[serde(flatten)]
pub unknown: HashMap<String, serde_json::Value>,
}
#[derive(Debug, serde::Deserialize)]
pub struct StreamParams {
#[serde(rename = "requestParams")]
pub request_params: HashMap<String, String>,
#[serde(rename = "itemType")]
pub item_type: String,
#[serde(rename = "requestEndpoint")]
pub request_endpoint: String,
#[serde(rename = "initialOffset")]
pub initial_offset: u64,
#[serde(flatten)]
pub unknown: HashMap<String, serde_json::Value>,
}
#[cfg(test)]
mod test {
use super::*;
const SCRAPED_WEBPAGE: &str = include_str!("../../test_data/scraped_webpage.json");
const LOGIN_WEBPAGE: &str = include_str!("../../test_data/login_webpage.json");
#[test]
fn parse_scraped_webpage() {
let scraped_webpage_info: ScrapedWebPageInfo =
serde_json::from_str(SCRAPED_WEBPAGE).expect("failed to parse scraped webpage info");
assert_eq!(
scraped_webpage_info
.get_current_deviation_id()
.expect("missing current deviation id"),
119577071
);
}
#[test]
fn parse_login_webpage() {
let _scraped_webpage_info: ScrapedWebPageInfo =
serde_json::from_str(LOGIN_WEBPAGE).expect("failed to parse scraped webpage info");
}
}