1use super::Deviation;
2use super::Media;
3use once_cell::sync::Lazy;
4use regex::Regex;
5use std::collections::HashMap;
6use url::Url;
7
8#[derive(Debug, thiserror::Error)]
10pub enum FromHtmlStrError {
11 #[error("missing initial state")]
13 MissingInitialState,
14
15 #[error(transparent)]
17 InvalidJson(#[from] serde_json::Error),
18}
19
20#[derive(Debug, serde::Deserialize)]
22pub struct ScrapedWebPageInfo {
23 #[serde(rename = "@@config")]
25 pub config: Config,
26
27 #[serde(rename = "@@entities")]
29 pub entities: Option<Entities>,
30
31 #[serde(rename = "@@DUPERBROWSE")]
33 pub duper_browse: Option<DuperBrowse>,
34
35 #[serde(rename = "@@publicSession")]
37 pub public_session: PublicSession,
38
39 #[serde(rename = "@@streams")]
41 pub streams: Option<Streams>,
42
43 #[serde(rename = "csrfToken")]
47 pub csrf_token: Option<Box<str>>,
48
49 #[serde(rename = "gallectionSection")]
50 pub gallection_section: Option<GallectionSection>,
51
52 #[serde(rename = "luToken")]
54 pub lu_token: Option<Box<str>>,
55
56 #[serde(rename = "luToken2")]
58 pub lu_token2: Option<Box<str>>,
59
60 #[serde(flatten)]
62 pub unknown: HashMap<String, serde_json::Value>,
63}
64
65impl ScrapedWebPageInfo {
66 pub fn from_html_str(input: &str) -> Result<Self, FromHtmlStrError> {
68 static REGEX: Lazy<Regex> = Lazy::new(|| {
69 Regex::new(r#"window\.__INITIAL_STATE__ = JSON\.parse\("(.*)"\);"#).unwrap()
70 });
71
72 let capture = REGEX
73 .captures(input)
74 .and_then(|captures| captures.get(1))
75 .ok_or(FromHtmlStrError::MissingInitialState)?;
76 let capture = capture
78 .as_str()
79 .replace("\\\"", "\"")
80 .replace("\\'", "'")
81 .replace("\\\\", "\\");
82 Ok(serde_json::from_str(&capture)?)
83 }
84
85 pub fn is_logged_in(&self) -> bool {
87 self.public_session.is_logged_in
88 }
89
90 pub fn get_current_deviation_id(&self) -> Option<&serde_json::Value> {
92 Some(
93 &self
94 .duper_browse
95 .as_ref()?
96 .root_stream
97 .as_ref()?
98 .current_open_item,
99 )
100 }
101
102 pub fn get_current_deviation(&self) -> Option<&Deviation> {
104 let id = self.get_current_deviation_id()?;
105 let id = match id {
106 serde_json::Value::Number(n) => n.as_u64()?,
107 serde_json::Value::String(s) => s.parse().ok()?,
108 _ => return None,
109 };
110 self.get_deviation_by_id(id)
111 }
112
113 pub fn get_current_deviation_extended(&self) -> Option<&DeviationExtended> {
115 let id = self.get_current_deviation_id()?;
116 let mut key_buffer = itoa::Buffer::new();
117 let key = match id {
118 serde_json::Value::Number(n) => {
119 let n = n.as_u64()?;
120 key_buffer.format(n)
121 }
122 serde_json::Value::String(s) => s,
123 _ => return None,
124 };
125 self.entities
126 .as_ref()?
127 .deviation_extended
128 .as_ref()?
129 .get(key)
130 }
131
132 pub fn get_deviation_by_id(&self, id: u64) -> Option<&Deviation> {
134 let mut key_buffer = itoa::Buffer::new();
135 self.entities.as_ref()?.deviation.get(key_buffer.format(id))
136 }
137
138 pub fn take_deviation_by_id(&mut self, id: u64) -> Option<Deviation> {
140 let mut key_buffer = itoa::Buffer::new();
141 self.entities
142 .as_mut()?
143 .deviation
144 .remove(key_buffer.format(id))
145 }
146
147 pub fn get_current_folder_id(&self) -> Option<i64> {
149 Some(self.gallection_section.as_ref()?.selected_folder_id)
150 }
151
152 pub fn get_folder_deviations_stream(&self, folder_id: i64) -> Option<&WithOffsetStream> {
156 let key = format!("folder-deviations-gallery-{folder_id}");
157
158 self.streams
159 .as_ref()?
160 .streams
161 .get(&key)?
162 .as_with_offset_stream()
163 }
164
165 pub fn get_gallery_folder_entity(&self, folder_id: i64) -> Option<&GalleryFolder> {
167 self.entities
168 .as_ref()?
169 .gallery_folder
170 .as_ref()?
171 .get(itoa::Buffer::new().format(folder_id))
172 }
173
174 pub fn get_user_entity(&self, user_id: u64) -> Option<&User> {
176 self.entities
177 .as_ref()?
178 .user
179 .as_ref()?
180 .get(itoa::Buffer::new().format(user_id))
181 }
182}
183
184#[derive(Debug, serde::Deserialize)]
186pub struct Config {
187 #[serde(rename = "csrfToken")]
189 pub csrf_token: String,
190
191 #[serde(flatten)]
193 pub unknown: HashMap<String, serde_json::Value>,
194}
195
196#[derive(Debug, serde::Deserialize)]
198pub struct Entities {
199 pub deviation: HashMap<String, Deviation>,
201
202 #[serde(rename = "deviationExtended")]
204 pub deviation_extended: Option<HashMap<String, DeviationExtended>>,
205
206 #[serde(rename = "galleryFolder")]
208 pub gallery_folder: Option<HashMap<String, GalleryFolder>>,
209
210 pub user: Option<HashMap<String, User>>,
212
213 #[serde(flatten)]
215 pub unknown: HashMap<String, serde_json::Value>,
216}
217
218#[derive(Debug, serde::Deserialize)]
220pub struct DeviationExtended {
221 pub download: Option<Download>,
223
224 pub description: Option<String>,
226
227 #[serde(rename = "additionalMedia")]
229 pub additional_media: Option<Vec<AdditionalMedia>>,
230
231 #[serde(rename = "isDaProtected")]
233 pub is_da_protected: Option<bool>,
234
235 #[serde(flatten)]
237 pub unknown: HashMap<String, serde_json::Value>,
238}
239
240#[derive(Debug, serde::Deserialize)]
242pub struct GalleryFolder {
243 #[serde(rename = "folderId")]
247 pub folder_id: i64,
248
249 pub name: String,
251
252 pub owner: u64,
254
255 #[serde(flatten)]
257 pub unknown: HashMap<String, serde_json::Value>,
258}
259
260#[derive(Debug, serde::Deserialize)]
262pub struct User {
263 #[serde(rename = "userId")]
265 pub user_id: u64,
266
267 pub username: String,
269
270 #[serde(flatten)]
272 pub unknown: HashMap<String, serde_json::Value>,
273}
274
275#[derive(Debug, serde::Deserialize)]
276pub struct Download {
277 pub filesize: u64,
279
280 pub height: u32,
282
283 pub width: u32,
285
286 #[serde(rename = "type")]
288 pub kind: String,
289
290 pub url: Url,
292
293 #[serde(flatten)]
295 pub unknown: HashMap<String, serde_json::Value>,
296}
297
298#[derive(Debug, serde::Deserialize)]
299pub struct AdditionalMedia {
300 pub media: Media,
302
303 #[serde(flatten)]
305 pub unknown: HashMap<String, serde_json::Value>,
306}
307
308#[derive(Debug, serde::Deserialize)]
310pub struct DuperBrowse {
311 #[serde(rename = "rootStream")]
313 pub root_stream: Option<RootStream>,
314
315 #[serde(flatten)]
317 pub unknown: HashMap<String, serde_json::Value>,
318}
319
320#[derive(Debug, serde::Deserialize)]
322pub struct RootStream {
323 #[serde(rename = "currentOpenItem")]
325 pub current_open_item: serde_json::Value,
326
327 #[serde(flatten)]
329 pub unknown: HashMap<String, serde_json::Value>,
330}
331
332#[derive(Debug, serde::Deserialize)]
334pub struct PublicSession {
335 #[serde(rename = "isLoggedIn")]
337 pub is_logged_in: bool,
338
339 #[serde(flatten)]
341 pub unknown: HashMap<String, serde_json::Value>,
342}
343
344#[derive(Debug, serde::Deserialize)]
346pub struct Streams {
347 #[serde(rename = "@@BROWSE_PAGE_STREAM")]
349 pub browse_page_stream: Option<BrowsePageStream>,
350
351 #[serde(flatten)]
355 pub streams: HashMap<String, Stream>,
356}
357
358#[derive(Debug, serde::Deserialize)]
360#[serde(tag = "streamType")]
361pub enum Stream {
362 #[serde(rename = "WITH_OFFSET")]
363 WithOffset(WithOffsetStream),
364
365 #[serde(untagged)]
366 Unknown(serde_json::Value),
367}
368
369impl Stream {
370 pub fn as_with_offset_stream(&self) -> Option<&WithOffsetStream> {
372 match self {
373 Self::WithOffset(stream) => Some(stream),
374 _ => None,
375 }
376 }
377}
378
379#[derive(Debug, serde::Deserialize)]
381pub struct WithOffsetStream {
382 pub items: Vec<u64>,
384
385 #[serde(rename = "itemsPerFetch")]
387 pub items_per_fetch: u32,
388
389 #[serde(rename = "hasMore")]
391 pub has_more: bool,
392
393 #[serde(rename = "hasLess")]
395 pub has_less: bool,
396
397 #[serde(flatten)]
399 pub unknown: HashMap<String, serde_json::Value>,
400}
401
402#[derive(Debug, serde::Deserialize)]
404pub struct BrowsePageStream {
405 pub cursor: String,
407
408 #[serde(rename = "hasLess")]
410 pub has_less: bool,
411
412 #[serde(rename = "hasMore")]
414 pub has_more: bool,
415
416 pub items: Vec<serde_json::Value>,
422
423 #[serde(rename = "itemsPerFetch")]
425 pub items_per_fetch: u64,
426
427 #[serde(rename = "streamParams")]
429 pub stream_params: StreamParams,
430
431 #[serde(rename = "streamType")]
433 pub stream_type: String,
434
435 #[serde(rename = "streamId")]
437 pub stream_id: String,
438
439 #[serde(rename = "fetchNextCallback")]
441 pub fetch_next_callback: String,
442
443 #[serde(flatten)]
445 pub unknown: HashMap<String, serde_json::Value>,
446}
447
448#[derive(Debug, serde::Deserialize)]
450pub struct StreamParams {
451 #[serde(rename = "requestParams")]
453 pub request_params: HashMap<String, String>,
454
455 #[serde(rename = "itemType")]
457 pub item_type: String,
458
459 #[serde(rename = "requestEndpoint")]
461 pub request_endpoint: String,
462
463 #[serde(rename = "initialOffset")]
465 pub initial_offset: u64,
466
467 #[serde(flatten)]
469 pub unknown: HashMap<String, serde_json::Value>,
470}
471
472#[derive(Debug, serde::Deserialize)]
474pub struct GallectionSection {
475 #[serde(rename = "currentPage")]
477 pub page: u64,
478
479 #[serde(rename = "selectedFolderId")]
481 pub selected_folder_id: i64,
482
483 #[serde(rename = "totalPages")]
485 pub total_pages: u64,
486
487 #[serde(flatten)]
489 pub unknown: HashMap<String, serde_json::Value>,
490}
491
492#[cfg(test)]
493mod test {
494 use super::*;
495
496 const SCRAPED_WEBPAGE: &str = include_str!("../../test_data/scraped_webpage.json");
497 const LOGIN_WEBPAGE: &str = include_str!("../../test_data/login_webpage.json");
498
499 #[test]
500 fn parse_scraped_webpage() {
501 let scraped_webpage_info: ScrapedWebPageInfo =
502 serde_json::from_str(SCRAPED_WEBPAGE).expect("failed to parse scraped webpage info");
503 assert_eq!(
504 scraped_webpage_info
505 .get_current_deviation_id()
506 .expect("missing current deviation id"),
507 119577071
508 );
509 }
511
512 #[test]
513 fn parse_login_webpage() {
514 let _scraped_webpage_info: ScrapedWebPageInfo =
515 serde_json::from_str(LOGIN_WEBPAGE).expect("failed to parse scraped webpage info");
516 }
517}