diff options
| author | namilsk <namilsk@namilsk.tech> | 2026-03-18 21:21:21 +0300 |
|---|---|---|
| committer | namilsk <namilsk@namilsk.tech> | 2026-03-18 21:21:21 +0300 |
| commit | 8887a775f5c46551f8d9ea0f2197d129008eabf1 (patch) | |
| tree | c37f9808af3326d8d0adf873c756bae0ebe6257f /src/geoparsers | |
| parent | da8e70f2e3c841796c122ca90617d74cb044b763 (diff) | |
Written geosite protobuf parser and tests 4 it
Diffstat (limited to 'src/geoparsers')
| -rw-r--r-- | src/geoparsers/mod.rs | 1 | ||||
| -rw-r--r-- | src/geoparsers/v2ray/mod.rs | 2 | ||||
| -rw-r--r-- | src/geoparsers/v2ray/parsing.rs | 79 | ||||
| -rw-r--r-- | src/geoparsers/v2ray/proto_src/geosite.proto | 66 | ||||
| -rw-r--r-- | src/geoparsers/v2ray/types.rs | 121 |
5 files changed, 269 insertions, 0 deletions
diff --git a/src/geoparsers/mod.rs b/src/geoparsers/mod.rs index 43af0f3..7828b9b 100644 --- a/src/geoparsers/mod.rs +++ b/src/geoparsers/mod.rs @@ -1,2 +1,3 @@ pub mod geoip2; pub mod toml; +pub mod v2ray;
\ No newline at end of file diff --git a/src/geoparsers/v2ray/mod.rs b/src/geoparsers/v2ray/mod.rs new file mode 100644 index 0000000..971be55 --- /dev/null +++ b/src/geoparsers/v2ray/mod.rs @@ -0,0 +1,2 @@ +pub mod parsing; +pub mod types; diff --git a/src/geoparsers/v2ray/parsing.rs b/src/geoparsers/v2ray/parsing.rs new file mode 100644 index 0000000..0f897bf --- /dev/null +++ b/src/geoparsers/v2ray/parsing.rs @@ -0,0 +1,79 @@ +use crate::geoparsers::v2ray::types::{Domain, GeoSite, GeoSiteList}; +use prost::bytes::Buf; +use prost::Message; +use std::fs; + +pub struct GeoSiteService { + index: GeoSiteList, +} + +impl GeoSiteService { + // TODO: Make more smart memory mapping; geosite files can be > 70MB + pub fn new(path: &str) -> Result<Self, Box<dyn std::error::Error>> { + let bytes = fs::read(path)?; + let geosite_list = decode_geosite_stream(&bytes)?; + + Ok(Self { + index: geosite_list, + }) + } + + // Idk but i think it can work + pub fn lookup(&self, value: &str) -> Option<&GeoSite> { + self.index + .entry + .iter() + .find(|site| site.domain.iter().any(|d| d.value == value)) + } + + /// Returns the number of GeoSite entries in the list + pub fn len(&self) -> usize { + self.index.entry.len() + } + + /// Returns true if the GeoSite list is empty + pub fn is_empty(&self) -> bool { + self.index.entry.is_empty() + } +} + +/// Decode a stream of length-delimited GeoSite messages +/// `geosite.dat` ts is not one protobuf-message, stream of length-delimited messages +/// so we need ts helper +fn decode_geosite_stream(bytes: &[u8]) -> Result<GeoSiteList, Box<dyn std::error::Error>> { + let mut buf = bytes; + let mut entries = Vec::new(); + + while buf.has_remaining() { + // Read tag (0x0a field 1, wire type 2) + let tag = buf.get_u8(); + if tag != 0x0a { + return Err(format!("Unexpected tag: {:#04x}", tag).into()); + } + // varint + let mut len = 0usize; + let mut shift = 0; + loop { + if !buf.has_remaining() { + return Err("Unexpected end of buffer while reading varint".into()); + } + let b = buf.get_u8(); + len |= ((b & 0x7f) as usize) << shift; + if b & 0x80 == 0 { + break; + } + shift += 7; + if shift >= 70 { + return Err("Varint too long".into()); + } + } + + let entry_bytes = &buf[..len]; + let site = GeoSite::decode(entry_bytes)?; + entries.push(site); + + buf.advance(len); + } + + Ok(GeoSiteList { entry: entries }) +} diff --git a/src/geoparsers/v2ray/proto_src/geosite.proto b/src/geoparsers/v2ray/proto_src/geosite.proto new file mode 100644 index 0000000..e6c76dd --- /dev/null +++ b/src/geoparsers/v2ray/proto_src/geosite.proto @@ -0,0 +1,66 @@ +syntax = "proto3"; + +package types; + +// Domain for routing decision. +message Domain { + // Type of domain value. + enum Type { + // The value is used as is. + Plain = 0; + // The value is used as a regular expression. + Regex = 1; + // The value is a root domain. + Domain = 2; + // The value is a domain. + Full = 3; + } + + // Domain matching type. + Type type = 1; + + // Domain value. + string value = 2; + + // Attribute of the domain. + message Attribute { + string key = 1; + oneof typed_value { + bool bool_value = 2; + int64 int_value = 3; + } + } + + // Attributes of this domain. May be used for filtering. + repeated Attribute attribute = 3; +} + +// IP for routing decision, in CIDR form. +message CIDR { + // IP address, should be either 4 or 16 bytes. + bytes ip = 1; + + // Number of leading ones in the network mask. + uint32 prefix = 2; +} + +message GeoIP { + string country_code = 1; + repeated CIDR cidr = 2; +} + +message GeoIPList { + repeated GeoIP entry = 1; +} + +message GeoSite { + string country_code = 1; + repeated Domain domain = 2; + // resource_hash instruct simplified config converter to load domain from geo file. + bytes resource_hash = 3; + string code = 4; +} + +message GeoSiteList { + repeated GeoSite entry = 1; +} diff --git a/src/geoparsers/v2ray/types.rs b/src/geoparsers/v2ray/types.rs new file mode 100644 index 0000000..d7c0436 --- /dev/null +++ b/src/geoparsers/v2ray/types.rs @@ -0,0 +1,121 @@ +// This file is @generated by prost-build. +/// Domain for routing decision. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Domain { + /// Domain matching type. + #[prost(enumeration = "domain::Type", tag = "1")] + pub r#type: i32, + /// Domain value. + #[prost(string, tag = "2")] + pub value: ::prost::alloc::string::String, + /// Attributes of this domain. May be used for filtering. + #[prost(message, repeated, tag = "3")] + pub attribute: ::prost::alloc::vec::Vec<domain::Attribute>, +} +/// Nested message and enum types in `Domain`. +pub mod domain { + /// Attribute of the domain. + #[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] + pub struct Attribute { + #[prost(string, tag = "1")] + pub key: ::prost::alloc::string::String, + #[prost(oneof = "attribute::TypedValue", tags = "2, 3")] + pub typed_value: ::core::option::Option<attribute::TypedValue>, + } + /// Nested message and enum types in `Attribute`. + pub mod attribute { + #[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Oneof)] + pub enum TypedValue { + #[prost(bool, tag = "2")] + BoolValue(bool), + #[prost(int64, tag = "3")] + IntValue(i64), + } + } + /// Type of domain value. + #[derive( + Clone, + Copy, + Debug, + PartialEq, + Eq, + Hash, + PartialOrd, + Ord, + ::prost::Enumeration + )] + #[repr(i32)] + pub enum Type { + /// The value is used as is. + Plain = 0, + /// The value is used as a regular expression. + Regex = 1, + /// The value is a root domain. + Domain = 2, + /// The value is a domain. + Full = 3, + } + impl Type { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + Self::Plain => "Plain", + Self::Regex => "Regex", + Self::Domain => "Domain", + Self::Full => "Full", + } + } + /// Creates an enum from field names used in the ProtoBuf definition. + pub fn from_str_name(value: &str) -> ::core::option::Option<Self> { + match value { + "Plain" => Some(Self::Plain), + "Regex" => Some(Self::Regex), + "Domain" => Some(Self::Domain), + "Full" => Some(Self::Full), + _ => None, + } + } + } +} +/// IP for routing decision, in CIDR form. +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct Cidr { + /// IP address, should be either 4 or 16 bytes. + #[prost(bytes = "vec", tag = "1")] + pub ip: ::prost::alloc::vec::Vec<u8>, + /// Number of leading ones in the network mask. + #[prost(uint32, tag = "2")] + pub prefix: u32, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct GeoIp { + #[prost(string, tag = "1")] + pub country_code: ::prost::alloc::string::String, + #[prost(message, repeated, tag = "2")] + pub cidr: ::prost::alloc::vec::Vec<Cidr>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct GeoIpList { + #[prost(message, repeated, tag = "1")] + pub entry: ::prost::alloc::vec::Vec<GeoIp>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct GeoSite { + #[prost(string, tag = "1")] + pub country_code: ::prost::alloc::string::String, + #[prost(message, repeated, tag = "2")] + pub domain: ::prost::alloc::vec::Vec<Domain>, + /// resource_hash instruct simplified config converter to load domain from geo file. + #[prost(bytes = "vec", tag = "3")] + pub resource_hash: ::prost::alloc::vec::Vec<u8>, + #[prost(string, tag = "4")] + pub code: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct GeoSiteList { + #[prost(message, repeated, tag = "1")] + pub entry: ::prost::alloc::vec::Vec<GeoSite>, +} |
