1use super::Deviation;
2use super::Media;
3use once_cell::sync::Lazy;
4use regex::Regex;
5use std::collections::HashMap;
6use url::Url;
7
8#[derive(Debug, thiserror::Error)]
10pub enum FromHtmlStrError {
11 #[error("missing initial state")]
13 MissingInitialState,
14
15 #[error(transparent)]
17 InvalidJson(#[from] serde_json::Error),
18}
19
20#[derive(Debug, serde::Deserialize)]
22pub struct ScrapedWebPageInfo {
23 #[serde(rename = "@@config")]
25 pub config: Config,
26
27 #[serde(rename = "@@entities")]
29 pub entities: Option<Entities>,
30
31 #[serde(rename = "@@DUPERBROWSE")]
33 pub duper_browse: Option<DuperBrowse>,
34
35 #[serde(rename = "@@publicSession")]
37 pub public_session: PublicSession,
38
39 #[serde(rename = "@@streams")]
41 pub streams: Option<Streams>,
42
43 #[serde(rename = "csrfToken")]
47 pub csrf_token: Option<Box<str>>,
48
49 #[serde(rename = "gallectionSection")]
50 pub gallection_section: Option<GallectionSection>,
51
52 #[serde(rename = "luToken")]
54 pub lu_token: Option<Box<str>>,
55
56 #[serde(rename = "luToken2")]
58 pub lu_token2: Option<Box<str>>,
59
60 #[serde(flatten)]
62 pub unknown: HashMap<String, serde_json::Value>,
63}
64
65impl ScrapedWebPageInfo {
66 pub fn from_html_str(input: &str) -> Result<Self, FromHtmlStrError> {
68 static REGEX: Lazy<Regex> = Lazy::new(|| {
69 Regex::new(r#"window\.__INITIAL_STATE__ = JSON\.parse\("(.*)"\);"#).unwrap()
70 });
71
72 let capture = REGEX
73 .captures(input)
74 .and_then(|captures| captures.get(1))
75 .ok_or(FromHtmlStrError::MissingInitialState)?;
76 let capture = capture
78 .as_str()
79 .replace("\\\"", "\"")
80 .replace("\\'", "'")
81 .replace("\\\\", "\\");
82 Ok(serde_json::from_str(&capture)?)
83 }
84
85 pub fn is_logged_in(&self) -> bool {
87 self.public_session.is_logged_in
88 }
89
90 pub fn get_current_deviation_id(&self) -> Option<&serde_json::Value> {
92 Some(
93 &self
94 .duper_browse
95 .as_ref()?
96 .root_stream
97 .as_ref()?
98 .current_open_item,
99 )
100 }
101
102 pub fn get_current_deviation(&self) -> Option<&Deviation> {
104 let id = self.get_current_deviation_id()?;
105 let id = match id {
106 serde_json::Value::Number(n) => n.as_u64()?,
107 serde_json::Value::String(s) => s.parse().ok()?,
108 _ => return None,
109 };
110 self.get_deviation_by_id(id)
111 }
112
113 pub fn get_current_deviation_extended(&self) -> Option<&DeviationExtended> {
115 let id = self.get_current_deviation_id()?;
116 let mut key_buffer = itoa::Buffer::new();
117 let key = match id {
118 serde_json::Value::Number(n) => {
119 let n = n.as_u64()?;
120 key_buffer.format(n)
121 }
122 serde_json::Value::String(s) => s,
123 _ => return None,
124 };
125 self.entities
126 .as_ref()?
127 .deviation_extended
128 .as_ref()?
129 .get(key)
130 }
131
132 pub fn get_deviation_by_id(&self, id: u64) -> Option<&Deviation> {
134 let mut key_buffer = itoa::Buffer::new();
135 self.entities.as_ref()?.deviation.get(key_buffer.format(id))
136 }
137
138 pub fn take_deviation_by_id(&mut self, id: u64) -> Option<Deviation> {
140 let mut key_buffer = itoa::Buffer::new();
141 self.entities
142 .as_mut()?
143 .deviation
144 .remove(key_buffer.format(id))
145 }
146
147 pub fn get_current_folder_id(&self) -> Option<i64> {
149 Some(self.gallection_section.as_ref()?.selected_folder_id)
150 }
151
152 pub fn get_folder_deviations_stream(&self, folder_id: i64) -> Option<&WithOffsetStream> {
156 let key = format!("folder-deviations-gallery-{folder_id}");
157
158 self.streams
159 .as_ref()?
160 .streams
161 .get(&key)?
162 .as_with_offset_stream()
163 }
164
165 pub fn get_gallery_folder_entity(&self, folder_id: i64) -> Option<&GalleryFolder> {
167 self.entities
168 .as_ref()?
169 .gallery_folder
170 .as_ref()?
171 .get(itoa::Buffer::new().format(folder_id))
172 }
173
174 pub fn get_user_entity(&self, user_id: u64) -> Option<&User> {
176 self.entities
177 .as_ref()?
178 .user
179 .as_ref()?
180 .get(itoa::Buffer::new().format(user_id))
181 }
182}
183
184#[derive(Debug, serde::Deserialize)]
186pub struct Config {
187 #[serde(rename = "csrfToken")]
189 pub csrf_token: String,
190
191 #[serde(flatten)]
193 pub unknown: HashMap<String, serde_json::Value>,
194}
195
196#[derive(Debug, serde::Deserialize)]
198pub struct Entities {
199 pub deviation: HashMap<String, Deviation>,
201
202 #[serde(rename = "deviationExtended")]
204 pub deviation_extended: Option<HashMap<String, DeviationExtended>>,
205
206 #[serde(rename = "galleryFolder")]
208 pub gallery_folder: Option<HashMap<String, GalleryFolder>>,
209
210 pub user: Option<HashMap<String, User>>,
212
213 #[serde(flatten)]
215 pub unknown: HashMap<String, serde_json::Value>,
216}
217
218#[derive(Debug, serde::Deserialize)]
220pub struct DeviationExtended {
221 pub download: Option<Download>,
223
224 pub description: Option<String>,
226
227 #[serde(rename = "additionalMedia")]
229 pub additional_media: Option<Vec<AdditionalMedia>>,
230
231 #[serde(rename = "parentDeviationEntityId")]
233 pub parent_deviation_entity_id: u64,
234
235 #[serde(flatten)]
237 pub unknown: HashMap<String, serde_json::Value>,
238}
239
240impl DeviationExtended {
241 pub fn can_download_additional_media(&self) -> bool {
248 let additional_media = match self.additional_media.as_ref() {
250 Some(additional_media) => additional_media,
251 None => return false,
252 };
253
254 if self.parent_deviation_entity_id < 1184619292 {
257 return true;
258 }
259
260 additional_media
263 .iter()
264 .all(|entry| entry.media.token.len() > 1)
265 }
266}
267
268#[derive(Debug, serde::Deserialize)]
270pub struct GalleryFolder {
271 #[serde(rename = "folderId")]
275 pub folder_id: i64,
276
277 pub name: String,
279
280 pub owner: u64,
282
283 #[serde(flatten)]
285 pub unknown: HashMap<String, serde_json::Value>,
286}
287
288#[derive(Debug, serde::Deserialize)]
290pub struct User {
291 #[serde(rename = "userId")]
293 pub user_id: u64,
294
295 pub username: String,
297
298 #[serde(flatten)]
300 pub unknown: HashMap<String, serde_json::Value>,
301}
302
303#[derive(Debug, serde::Deserialize)]
304pub struct Download {
305 pub filesize: u64,
307
308 pub height: u32,
310
311 pub width: u32,
313
314 #[serde(rename = "type")]
316 pub kind: String,
317
318 pub url: Url,
320
321 #[serde(flatten)]
323 pub unknown: HashMap<String, serde_json::Value>,
324}
325
326#[derive(Debug, serde::Deserialize)]
327pub struct AdditionalMedia {
328 pub media: Media,
330
331 #[serde(flatten)]
333 pub unknown: HashMap<String, serde_json::Value>,
334}
335
336#[derive(Debug, serde::Deserialize)]
338pub struct DuperBrowse {
339 #[serde(rename = "rootStream")]
341 pub root_stream: Option<RootStream>,
342
343 #[serde(flatten)]
345 pub unknown: HashMap<String, serde_json::Value>,
346}
347
348#[derive(Debug, serde::Deserialize)]
350pub struct RootStream {
351 #[serde(rename = "currentOpenItem")]
353 pub current_open_item: serde_json::Value,
354
355 #[serde(flatten)]
357 pub unknown: HashMap<String, serde_json::Value>,
358}
359
360#[derive(Debug, serde::Deserialize)]
362pub struct PublicSession {
363 #[serde(rename = "isLoggedIn")]
365 pub is_logged_in: bool,
366
367 #[serde(flatten)]
369 pub unknown: HashMap<String, serde_json::Value>,
370}
371
372#[derive(Debug, serde::Deserialize)]
374pub struct Streams {
375 #[serde(rename = "@@BROWSE_PAGE_STREAM")]
377 pub browse_page_stream: Option<BrowsePageStream>,
378
379 #[serde(flatten)]
383 pub streams: HashMap<String, Stream>,
384}
385
386#[derive(Debug, serde::Deserialize)]
388#[serde(tag = "streamType")]
389pub enum Stream {
390 #[serde(rename = "WITH_OFFSET")]
391 WithOffset(WithOffsetStream),
392
393 #[serde(untagged)]
394 Unknown(serde_json::Value),
395}
396
397impl Stream {
398 pub fn as_with_offset_stream(&self) -> Option<&WithOffsetStream> {
400 match self {
401 Self::WithOffset(stream) => Some(stream),
402 _ => None,
403 }
404 }
405}
406
407#[derive(Debug, serde::Deserialize)]
409pub struct WithOffsetStream {
410 pub items: Vec<u64>,
412
413 #[serde(rename = "itemsPerFetch")]
415 pub items_per_fetch: u32,
416
417 #[serde(rename = "hasMore")]
419 pub has_more: bool,
420
421 #[serde(rename = "hasLess")]
423 pub has_less: bool,
424
425 #[serde(flatten)]
427 pub unknown: HashMap<String, serde_json::Value>,
428}
429
430#[derive(Debug, serde::Deserialize)]
432pub struct BrowsePageStream {
433 pub cursor: String,
435
436 #[serde(rename = "hasLess")]
438 pub has_less: bool,
439
440 #[serde(rename = "hasMore")]
442 pub has_more: bool,
443
444 pub items: Vec<serde_json::Value>,
450
451 #[serde(rename = "itemsPerFetch")]
453 pub items_per_fetch: u64,
454
455 #[serde(rename = "streamParams")]
457 pub stream_params: StreamParams,
458
459 #[serde(rename = "streamType")]
461 pub stream_type: String,
462
463 #[serde(rename = "streamId")]
465 pub stream_id: String,
466
467 #[serde(rename = "fetchNextCallback")]
469 pub fetch_next_callback: String,
470
471 #[serde(flatten)]
473 pub unknown: HashMap<String, serde_json::Value>,
474}
475
476#[derive(Debug, serde::Deserialize)]
478pub struct StreamParams {
479 #[serde(rename = "requestParams")]
481 pub request_params: HashMap<String, String>,
482
483 #[serde(rename = "itemType")]
485 pub item_type: String,
486
487 #[serde(rename = "requestEndpoint")]
489 pub request_endpoint: String,
490
491 #[serde(rename = "initialOffset")]
493 pub initial_offset: u64,
494
495 #[serde(flatten)]
497 pub unknown: HashMap<String, serde_json::Value>,
498}
499
500#[derive(Debug, serde::Deserialize)]
502pub struct GallectionSection {
503 #[serde(rename = "currentPage")]
505 pub page: u64,
506
507 #[serde(rename = "selectedFolderId")]
509 pub selected_folder_id: i64,
510
511 #[serde(rename = "totalPages")]
513 pub total_pages: u64,
514
515 #[serde(flatten)]
517 pub unknown: HashMap<String, serde_json::Value>,
518}
519
520#[cfg(test)]
521mod test {
522 use super::*;
523
524 const SCRAPED_WEBPAGE: &str = include_str!("../../test_data/scraped_webpage.json");
525 const LOGIN_WEBPAGE: &str = include_str!("../../test_data/login_webpage.json");
526
527 #[test]
528 fn parse_scraped_webpage() {
529 let scraped_webpage_info: ScrapedWebPageInfo =
530 serde_json::from_str(SCRAPED_WEBPAGE).expect("failed to parse scraped webpage info");
531 assert_eq!(
532 scraped_webpage_info
533 .get_current_deviation_id()
534 .expect("missing current deviation id"),
535 119577071
536 );
537 }
539
540 #[test]
541 fn parse_login_webpage() {
542 let _scraped_webpage_info: ScrapedWebPageInfo =
543 serde_json::from_str(LOGIN_WEBPAGE).expect("failed to parse scraped webpage info");
544 }
545}