azalea_core/
checksum.rs

1use std::{cmp::Ordering, fmt, hash::Hasher};
2
3use azalea_buf::AzBuf;
4use azalea_registry::identifier::Identifier;
5use crc32c::Crc32cHasher;
6use serde::{Serialize, ser};
7use thiserror::Error;
8use tracing::error;
9
10use crate::registry_holder::RegistryHolder;
11
12#[derive(AzBuf, Clone, Copy, Debug, Default, Eq, Ord, PartialEq, PartialOrd)]
13pub struct Checksum(pub u32);
14
15pub struct ChecksumSerializer<'a, 'r> {
16    hasher: &'a mut Crc32cHasher,
17    registries: &'r RegistryHolder,
18}
19impl<'a, 'r> ChecksumSerializer<'a, 'r> {
20    pub fn checksum(&mut self) -> Checksum {
21        Checksum(self.hasher.finish() as u32)
22    }
23}
24
25impl<'a, 'r> ser::Serializer for ChecksumSerializer<'a, 'r> {
26    type Ok = ();
27
28    // The error type when some error occurs during serialization.
29    type Error = ChecksumError;
30
31    type SerializeSeq = ChecksumListSerializer<'a, 'r>;
32    type SerializeTuple = ChecksumListSerializer<'a, 'r>;
33    type SerializeTupleStruct = ChecksumListSerializer<'a, 'r>;
34    type SerializeTupleVariant = ChecksumMapSerializer<'a, 'r>;
35    type SerializeMap = ChecksumMapSerializer<'a, 'r>;
36    type SerializeStruct = ChecksumMapSerializer<'a, 'r>;
37    type SerializeStructVariant = ChecksumMapSerializer<'a, 'r>;
38
39    fn serialize_bool(self, v: bool) -> Result<()> {
40        assert!(self.hasher.finish() == 0);
41        self.hasher.write_u8(13);
42        self.hasher.write(&[v as u8]);
43        Ok(())
44    }
45
46    fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap> {
47        assert!(self.hasher.finish() == 0);
48        Ok(ChecksumMapSerializer {
49            hasher: self.hasher,
50            registries: self.registries,
51            entries: Vec::new(),
52        })
53    }
54    fn serialize_struct(self, _name: &'static str, len: usize) -> Result<Self::SerializeStruct> {
55        assert!(self.hasher.finish() == 0);
56        self.serialize_map(Some(len))
57    }
58
59    fn serialize_i8(self, v: i8) -> Result<()> {
60        assert!(self.hasher.finish() == 0);
61        self.hasher.write_u8(6);
62        self.hasher.write(&v.to_le_bytes());
63        Ok(())
64    }
65
66    fn serialize_i16(self, v: i16) -> Result<()> {
67        assert!(self.hasher.finish() == 0);
68        self.hasher.write_u8(7);
69        self.hasher.write(&v.to_le_bytes());
70        Ok(())
71    }
72
73    fn serialize_i32(self, v: i32) -> Result<()> {
74        assert!(self.hasher.finish() == 0);
75        self.hasher.write_u8(8);
76        self.hasher.write(&v.to_le_bytes());
77        Ok(())
78    }
79
80    fn serialize_i64(self, v: i64) -> Result<()> {
81        assert!(self.hasher.finish() == 0);
82        self.hasher.write_u8(9);
83        self.hasher.write(&v.to_le_bytes());
84        Ok(())
85    }
86
87    fn serialize_u8(self, v: u8) -> Result<()> {
88        assert!(self.hasher.finish() == 0);
89        self.serialize_i8(v as i8)
90    }
91
92    fn serialize_u16(self, v: u16) -> Result<()> {
93        assert!(self.hasher.finish() == 0);
94        self.serialize_i16(v as i16)
95    }
96
97    fn serialize_u32(self, v: u32) -> Result<()> {
98        assert!(self.hasher.finish() == 0);
99        self.serialize_i32(v as i32)
100    }
101
102    fn serialize_u64(self, v: u64) -> Result<()> {
103        assert!(self.hasher.finish() == 0);
104        self.serialize_i64(v as i64)
105    }
106
107    fn serialize_f32(self, v: f32) -> Result<()> {
108        assert!(self.hasher.finish() == 0);
109        self.hasher.write_u8(10);
110        self.hasher.write(&v.to_le_bytes());
111        Ok(())
112    }
113
114    fn serialize_f64(self, v: f64) -> Result<()> {
115        assert!(self.hasher.finish() == 0);
116        self.hasher.write_u8(11);
117        self.hasher.write(&v.to_le_bytes());
118        Ok(())
119    }
120
121    fn serialize_char(self, v: char) -> Result<()> {
122        assert!(self.hasher.finish() == 0);
123        self.serialize_u32(v as u32)
124    }
125
126    fn serialize_str(self, v: &str) -> Result<()> {
127        assert!(self.hasher.finish() == 0);
128        self.hasher.write_u8(12);
129        let utf16 = v.encode_utf16().collect::<Vec<_>>();
130        self.hasher.write(&(utf16.len() as u32).to_le_bytes());
131        for c in utf16 {
132            self.hasher.write(&c.to_le_bytes());
133        }
134        Ok(())
135    }
136
137    fn serialize_bytes(self, v: &[u8]) -> Result<()> {
138        assert!(self.hasher.finish() == 0);
139        self.hasher.write_u8(14);
140        self.hasher.write(v);
141        self.hasher.write_u8(15);
142        Ok(())
143    }
144
145    fn serialize_none(self) -> Result<()> {
146        assert!(self.hasher.finish() == 0);
147        self.hasher.write_u8(1);
148        Ok(())
149    }
150
151    fn serialize_some<T>(self, value: &T) -> Result<()>
152    where
153        T: ?Sized + Serialize,
154    {
155        // check if t
156
157        value.serialize(self)?;
158        Ok(())
159    }
160
161    fn serialize_unit(self) -> Result<()> {
162        assert!(self.hasher.finish() == 0);
163        Ok(())
164    }
165
166    fn serialize_unit_struct(self, _name: &'static str) -> Result<()> {
167        assert!(self.hasher.finish() == 0);
168        update_hasher_for_map(self.hasher, &[]);
169        Ok(())
170    }
171
172    fn serialize_unit_variant(
173        self,
174        _name: &'static str,
175        _variant_index: u32,
176        variant: &'static str,
177    ) -> Result<()> {
178        self.serialize_str(variant)
179    }
180
181    fn serialize_newtype_struct<T>(self, _name: &'static str, value: &T) -> Result<()>
182    where
183        T: ?Sized + Serialize,
184    {
185        value.serialize(self)
186    }
187
188    fn serialize_newtype_variant<T>(
189        self,
190        name: &'static str,
191        variant_index: u32,
192        _variant: &'static str,
193        value: &T,
194    ) -> Result<()>
195    where
196        T: ?Sized + Serialize,
197    {
198        // we can't have custom handlers with serde's traits, so we use this silly hack
199        // to make serializing data-driven registries work
200        if name.starts_with("minecraft:") {
201            let value = self
202                .registries
203                .protocol_id_to_identifier(Identifier::from(name), variant_index)
204                .map(|v| v.to_string())
205                .unwrap_or_default();
206            self.serialize_str(&value)?;
207            return Ok(());
208        }
209
210        value.serialize(ChecksumSerializer {
211            hasher: self.hasher,
212            registries: self.registries,
213        })
214    }
215
216    fn serialize_seq(self, len: Option<usize>) -> Result<Self::SerializeSeq> {
217        assert!(self.hasher.finish() == 0);
218        Ok(ChecksumListSerializer {
219            hasher: self.hasher,
220            registries: self.registries,
221            values: Vec::with_capacity(len.unwrap_or_default()),
222            list_kind: ListKind::Normal,
223        })
224    }
225
226    fn serialize_tuple(self, len: usize) -> Result<Self::SerializeTuple> {
227        assert!(self.hasher.finish() == 0);
228        Ok(ChecksumListSerializer {
229            hasher: self.hasher,
230            registries: self.registries,
231            values: Vec::with_capacity(len),
232            list_kind: ListKind::Normal,
233        })
234    }
235
236    fn serialize_tuple_struct(
237        self,
238        name: &'static str,
239        len: usize,
240    ) -> Result<Self::SerializeTupleStruct> {
241        assert!(self.hasher.finish() == 0);
242        let list_kind = if name == "azalea:int_array" {
243            self.hasher.write_u8(16);
244            ListKind::Int
245        } else if name == "azalea:long_array" {
246            self.hasher.write_u8(18);
247            ListKind::Long
248        } else {
249            ListKind::Normal
250        };
251        Ok(ChecksumListSerializer {
252            hasher: self.hasher,
253            registries: self.registries,
254            values: Vec::with_capacity(len),
255            list_kind,
256        })
257    }
258
259    fn serialize_tuple_variant(
260        self,
261        _name: &'static str,
262        _variant_index: u32,
263        _variant: &'static str,
264        len: usize,
265    ) -> Result<Self::SerializeTupleVariant> {
266        assert!(self.hasher.finish() == 0);
267        Ok(ChecksumMapSerializer {
268            hasher: self.hasher,
269            registries: self.registries,
270            entries: Vec::with_capacity(len),
271        })
272    }
273
274    fn serialize_struct_variant(
275        self,
276        _name: &'static str,
277        _variant_index: u32,
278        _variant: &'static str,
279        len: usize,
280    ) -> Result<Self::SerializeStructVariant> {
281        Ok(ChecksumMapSerializer {
282            hasher: self.hasher,
283            registries: self.registries,
284            entries: Vec::with_capacity(len),
285        })
286    }
287}
288
289pub struct ChecksumListSerializer<'a, 'r> {
290    hasher: &'a mut Crc32cHasher,
291    registries: &'r RegistryHolder,
292    values: Vec<Checksum>,
293    /// If you set this to not be the default, you should also update the hasher
294    /// before creating the list serializer.
295    list_kind: ListKind,
296}
297impl<'a, 'r> ser::SerializeSeq for ChecksumListSerializer<'a, 'r> {
298    type Ok = ();
299    type Error = ChecksumError;
300
301    fn serialize_element<T>(&mut self, value: &T) -> Result<()>
302    where
303        T: ?Sized + Serialize,
304    {
305        if self.list_kind == ListKind::Normal {
306            // elements are hashed individually
307            self.values.push(get_checksum(value, self.registries)?);
308        } else {
309            value.serialize(IntOrLongArrayChecksumSerializer {
310                hasher: self.hasher,
311            })?;
312        }
313
314        Ok(())
315    }
316
317    fn end(self) -> Result<()> {
318        match self.list_kind {
319            ListKind::Normal => {
320                assert!(self.hasher.finish() == 0);
321                update_hasher_for_list(self.hasher, &self.values);
322            }
323            ListKind::Int => {
324                self.hasher.write_u8(17);
325            }
326            ListKind::Long => {
327                self.hasher.write_u8(19);
328            }
329        }
330
331        Ok(())
332    }
333}
334/// Minecraft sometimes serializes u8/i32/i64 lists differently, so we have to
335/// keep track of that when serializing the arrays.
336///
337/// Byte arrays aren't included here as they're handled with `serialize_bytes`.
338#[derive(Default, Eq, PartialEq)]
339enum ListKind {
340    #[default]
341    Normal,
342    Int,
343    Long,
344}
345
346impl<'a, 'r> ser::SerializeTuple for ChecksumListSerializer<'a, 'r> {
347    type Ok = ();
348    type Error = ChecksumError;
349
350    fn serialize_element<T>(&mut self, value: &T) -> Result<()>
351    where
352        T: ?Sized + Serialize,
353    {
354        ser::SerializeSeq::serialize_element(self, value)
355    }
356
357    fn end(self) -> Result<()> {
358        ser::SerializeSeq::end(self)
359    }
360}
361impl<'a, 'r> ser::SerializeTupleStruct for ChecksumListSerializer<'a, 'r> {
362    type Ok = ();
363    type Error = ChecksumError;
364
365    fn serialize_field<T>(&mut self, value: &T) -> Result<()>
366    where
367        T: ?Sized + Serialize,
368    {
369        ser::SerializeSeq::serialize_element(self, value)
370    }
371
372    fn end(self) -> Result<()> {
373        ser::SerializeSeq::end(self)
374    }
375}
376
377pub struct ChecksumMapSerializer<'a, 'r> {
378    // this is only written to at the end
379    hasher: &'a mut Crc32cHasher,
380    registries: &'r RegistryHolder,
381    // we have to keep track of the elements like this because they're sorted at the end
382    entries: Vec<(Checksum, Checksum)>,
383}
384impl<'a, 'r> ser::SerializeMap for ChecksumMapSerializer<'a, 'r> {
385    type Ok = ();
386    type Error = ChecksumError;
387
388    fn serialize_key<T>(&mut self, key: &T) -> Result<()>
389    where
390        T: ?Sized + Serialize,
391    {
392        // this 0 is a placeholder
393        self.entries
394            .push((get_checksum(key, self.registries)?, Checksum(0)));
395        Ok(())
396    }
397
398    // It doesn't make a difference whether the colon is printed at the end of
399    // `serialize_key` or at the beginning of `serialize_value`. In this case
400    // the code is a bit simpler having it here.
401    fn serialize_value<T>(&mut self, value: &T) -> Result<()>
402    where
403        T: ?Sized + Serialize,
404    {
405        // placeholder gets replaced here
406        self.entries
407            .last_mut()
408            .expect("entry should've already been added")
409            .1 = get_checksum(value, self.registries)?;
410        Ok(())
411    }
412
413    fn end(self) -> Result<()> {
414        assert!(self.hasher.finish() == 0);
415        update_hasher_for_map(self.hasher, &self.entries);
416        Ok(())
417    }
418}
419impl<'a, 'r> ser::SerializeTupleVariant for ChecksumMapSerializer<'a, 'r> {
420    type Ok = ();
421    type Error = ChecksumError;
422
423    fn serialize_field<T>(&mut self, _value: &T) -> Result<()>
424    where
425        T: ?Sized + Serialize,
426    {
427        // TODO
428        error!("tuple variants are not supported when serializing checksums");
429        Ok(())
430    }
431
432    fn end(self) -> Result<()> {
433        assert!(self.hasher.finish() == 0);
434        Ok(())
435    }
436}
437impl<'a, 'r> ser::SerializeStruct for ChecksumMapSerializer<'a, 'r> {
438    type Ok = ();
439    type Error = ChecksumError;
440
441    fn serialize_field<T>(&mut self, key: &'static str, value: &T) -> Result<()>
442    where
443        T: ?Sized + Serialize,
444    {
445        self.entries.push((
446            get_checksum(key, self.registries)?,
447            get_checksum(value, self.registries)?,
448        ));
449        Ok(())
450    }
451
452    fn end(self) -> Result<()> {
453        assert!(self.hasher.finish() == 0);
454        update_hasher_for_map(self.hasher, &self.entries);
455        Ok(())
456    }
457}
458impl<'a, 'r> ser::SerializeStructVariant for ChecksumMapSerializer<'a, 'r> {
459    type Ok = ();
460    type Error = ChecksumError;
461
462    fn serialize_field<T>(&mut self, key: &'static str, value: &T) -> Result<()>
463    where
464        T: ?Sized + Serialize,
465    {
466        self.entries.push((
467            get_checksum(key, self.registries)?,
468            get_checksum(value, self.registries)?,
469        ));
470        Ok(())
471    }
472
473    fn end(self) -> Result<()> {
474        assert!(self.hasher.finish() == 0);
475        update_hasher_for_map(self.hasher, &self.entries);
476        Ok(())
477    }
478}
479
480/// A hasher that can only serialize i32 and i64.
481struct IntOrLongArrayChecksumSerializer<'a> {
482    hasher: &'a mut Crc32cHasher,
483}
484impl<'a> ser::Serializer for IntOrLongArrayChecksumSerializer<'a> {
485    type Ok = ();
486    type Error = ChecksumError;
487    // unused
488    type SerializeSeq = ChecksumListSerializer<'a, 'a>;
489    type SerializeTuple = ChecksumListSerializer<'a, 'a>;
490    type SerializeTupleStruct = ChecksumListSerializer<'a, 'a>;
491    type SerializeTupleVariant = ChecksumMapSerializer<'a, 'a>;
492    type SerializeMap = ChecksumMapSerializer<'a, 'a>;
493    type SerializeStruct = ChecksumMapSerializer<'a, 'a>;
494    type SerializeStructVariant = ChecksumMapSerializer<'a, 'a>;
495
496    fn serialize_bool(self, _v: bool) -> Result<()> {
497        unimplemented!()
498    }
499    fn serialize_i8(self, _v: i8) -> Result<()> {
500        unimplemented!()
501    }
502    fn serialize_i16(self, _v: i16) -> Result<()> {
503        unimplemented!()
504    }
505    fn serialize_i32(self, v: i32) -> Result<()> {
506        self.hasher.write(&v.to_le_bytes());
507        Ok(())
508    }
509    fn serialize_i64(self, v: i64) -> Result<()> {
510        self.hasher.write(&v.to_le_bytes());
511        Ok(())
512    }
513    fn serialize_u8(self, _v: u8) -> Result<()> {
514        unimplemented!()
515    }
516    fn serialize_u16(self, _v: u16) -> Result<()> {
517        unimplemented!()
518    }
519    fn serialize_u32(self, v: u32) -> Result<()> {
520        self.serialize_i32(v as i32)
521    }
522    fn serialize_u64(self, v: u64) -> Result<()> {
523        self.serialize_i64(v as i64)
524    }
525    fn serialize_f32(self, _v: f32) -> Result<()> {
526        unimplemented!()
527    }
528    fn serialize_f64(self, _v: f64) -> Result<()> {
529        unimplemented!()
530    }
531    fn serialize_char(self, _v: char) -> Result<()> {
532        unimplemented!()
533    }
534    fn serialize_str(self, _v: &str) -> Result<()> {
535        unimplemented!()
536    }
537    fn serialize_bytes(self, _v: &[u8]) -> Result<()> {
538        unimplemented!()
539    }
540    fn serialize_none(self) -> Result<()> {
541        unimplemented!()
542    }
543    fn serialize_some<T>(self, _v: &T) -> Result<()>
544    where
545        T: ?Sized + Serialize,
546    {
547        unimplemented!()
548    }
549    fn serialize_unit(self) -> Result<()> {
550        unimplemented!()
551    }
552    fn serialize_unit_struct(self, _name: &'static str) -> Result<()> {
553        unimplemented!()
554    }
555    fn serialize_unit_variant(
556        self,
557        _name: &'static str,
558        _variant_index: u32,
559        _variant: &'static str,
560    ) -> Result<()> {
561        unimplemented!()
562    }
563    fn serialize_newtype_struct<T>(self, _name: &'static str, _value: &T) -> Result<()>
564    where
565        T: ?Sized + Serialize,
566    {
567        unimplemented!()
568    }
569    fn serialize_newtype_variant<T>(
570        self,
571        _name: &'static str,
572        _variant_index: u32,
573        _variant: &'static str,
574        _value: &T,
575    ) -> Result<()>
576    where
577        T: ?Sized + Serialize,
578    {
579        unimplemented!()
580    }
581    fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq> {
582        unimplemented!()
583    }
584    fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple> {
585        unimplemented!()
586    }
587    fn serialize_tuple_struct(
588        self,
589        _name: &'static str,
590        _len: usize,
591    ) -> Result<Self::SerializeTupleStruct> {
592        unimplemented!()
593    }
594    fn serialize_tuple_variant(
595        self,
596        _name: &'static str,
597        _variant_index: u32,
598        _variant: &'static str,
599        _len: usize,
600    ) -> Result<Self::SerializeTupleVariant> {
601        unimplemented!()
602    }
603    fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap> {
604        unimplemented!()
605    }
606    fn serialize_struct(self, _name: &'static str, _len: usize) -> Result<Self::SerializeStruct> {
607        unimplemented!()
608    }
609    fn serialize_struct_variant(
610        self,
611        _name: &'static str,
612        _variant_index: u32,
613        _variant: &'static str,
614        _len: usize,
615    ) -> Result<Self::SerializeStructVariant> {
616        unimplemented!()
617    }
618}
619
620#[derive(Debug, Error)]
621#[error("Checksum serialization error")]
622pub struct ChecksumError;
623impl ser::Error for ChecksumError {
624    fn custom<T>(msg: T) -> Self
625    where
626        T: fmt::Display,
627    {
628        eprintln!("Serialization error: {msg}");
629        ChecksumError
630    }
631}
632type Result<T> = std::result::Result<T, ChecksumError>;
633
634pub fn get_checksum<T: Serialize + ?Sized>(
635    value: &T,
636    registries: &RegistryHolder,
637) -> Result<Checksum> {
638    let mut hasher = Crc32cHasher::default();
639    value.serialize(ChecksumSerializer {
640        hasher: &mut hasher,
641        registries,
642    })?;
643    Ok(Checksum(hasher.finish() as u32))
644}
645
646fn update_hasher_for_list(h: &mut Crc32cHasher, values: &[Checksum]) {
647    h.write_u8(4);
648    for v in values {
649        h.write(&v.0.to_le_bytes());
650    }
651    h.write_u8(5);
652}
653fn update_hasher_for_map(h: &mut Crc32cHasher, entries: &[(Checksum, Checksum)]) {
654    h.write_u8(2);
655    let mut entries = entries.to_vec();
656    entries.sort_by(|a, b| match a.0.cmp(&b.0) {
657        Ordering::Equal => a.1.cmp(&b.1),
658        other => other,
659    });
660    for (k, v) in entries {
661        h.write(&k.0.to_le_bytes());
662        h.write(&v.0.to_le_bytes());
663    }
664    h.write_u8(3);
665}