summaryrefslogtreecommitdiff
path: root/src/geoparsers/v2ray
diff options
context:
space:
mode:
Diffstat (limited to 'src/geoparsers/v2ray')
-rw-r--r--src/geoparsers/v2ray/mod.rs2
-rw-r--r--src/geoparsers/v2ray/parsing.rs79
-rw-r--r--src/geoparsers/v2ray/proto_src/geosite.proto66
-rw-r--r--src/geoparsers/v2ray/types.rs121
4 files changed, 268 insertions, 0 deletions
diff --git a/src/geoparsers/v2ray/mod.rs b/src/geoparsers/v2ray/mod.rs
new file mode 100644
index 0000000..971be55
--- /dev/null
+++ b/src/geoparsers/v2ray/mod.rs
@@ -0,0 +1,2 @@
+pub mod parsing;
+pub mod types;
diff --git a/src/geoparsers/v2ray/parsing.rs b/src/geoparsers/v2ray/parsing.rs
new file mode 100644
index 0000000..0f897bf
--- /dev/null
+++ b/src/geoparsers/v2ray/parsing.rs
@@ -0,0 +1,79 @@
+use crate::geoparsers::v2ray::types::{Domain, GeoSite, GeoSiteList};
+use prost::bytes::Buf;
+use prost::Message;
+use std::fs;
+
+pub struct GeoSiteService {
+ index: GeoSiteList,
+}
+
+impl GeoSiteService {
+ // TODO: Make more smart memory mapping; geosite files can be > 70MB
+ pub fn new(path: &str) -> Result<Self, Box<dyn std::error::Error>> {
+ let bytes = fs::read(path)?;
+ let geosite_list = decode_geosite_stream(&bytes)?;
+
+ Ok(Self {
+ index: geosite_list,
+ })
+ }
+
+ // Idk but i think it can work
+ pub fn lookup(&self, value: &str) -> Option<&GeoSite> {
+ self.index
+ .entry
+ .iter()
+ .find(|site| site.domain.iter().any(|d| d.value == value))
+ }
+
+ /// Returns the number of GeoSite entries in the list
+ pub fn len(&self) -> usize {
+ self.index.entry.len()
+ }
+
+ /// Returns true if the GeoSite list is empty
+ pub fn is_empty(&self) -> bool {
+ self.index.entry.is_empty()
+ }
+}
+
+/// Decode a stream of length-delimited GeoSite messages
+/// `geosite.dat` ts is not one protobuf-message, stream of length-delimited messages
+/// so we need ts helper
+fn decode_geosite_stream(bytes: &[u8]) -> Result<GeoSiteList, Box<dyn std::error::Error>> {
+ let mut buf = bytes;
+ let mut entries = Vec::new();
+
+ while buf.has_remaining() {
+ // Read tag (0x0a field 1, wire type 2)
+ let tag = buf.get_u8();
+ if tag != 0x0a {
+ return Err(format!("Unexpected tag: {:#04x}", tag).into());
+ }
+ // varint
+ let mut len = 0usize;
+ let mut shift = 0;
+ loop {
+ if !buf.has_remaining() {
+ return Err("Unexpected end of buffer while reading varint".into());
+ }
+ let b = buf.get_u8();
+ len |= ((b & 0x7f) as usize) << shift;
+ if b & 0x80 == 0 {
+ break;
+ }
+ shift += 7;
+ if shift >= 70 {
+ return Err("Varint too long".into());
+ }
+ }
+
+ let entry_bytes = &buf[..len];
+ let site = GeoSite::decode(entry_bytes)?;
+ entries.push(site);
+
+ buf.advance(len);
+ }
+
+ Ok(GeoSiteList { entry: entries })
+}
diff --git a/src/geoparsers/v2ray/proto_src/geosite.proto b/src/geoparsers/v2ray/proto_src/geosite.proto
new file mode 100644
index 0000000..e6c76dd
--- /dev/null
+++ b/src/geoparsers/v2ray/proto_src/geosite.proto
@@ -0,0 +1,66 @@
+syntax = "proto3";
+
+package types;
+
+// Domain for routing decision.
+message Domain {
+ // Type of domain value.
+ enum Type {
+ // The value is used as is.
+ Plain = 0;
+ // The value is used as a regular expression.
+ Regex = 1;
+ // The value is a root domain.
+ Domain = 2;
+ // The value is a domain.
+ Full = 3;
+ }
+
+ // Domain matching type.
+ Type type = 1;
+
+ // Domain value.
+ string value = 2;
+
+ // Attribute of the domain.
+ message Attribute {
+ string key = 1;
+ oneof typed_value {
+ bool bool_value = 2;
+ int64 int_value = 3;
+ }
+ }
+
+ // Attributes of this domain. May be used for filtering.
+ repeated Attribute attribute = 3;
+}
+
+// IP for routing decision, in CIDR form.
+message CIDR {
+ // IP address, should be either 4 or 16 bytes.
+ bytes ip = 1;
+
+ // Number of leading ones in the network mask.
+ uint32 prefix = 2;
+}
+
+message GeoIP {
+ string country_code = 1;
+ repeated CIDR cidr = 2;
+}
+
+message GeoIPList {
+ repeated GeoIP entry = 1;
+}
+
+message GeoSite {
+ string country_code = 1;
+ repeated Domain domain = 2;
+ // resource_hash instruct simplified config converter to load domain from geo file.
+ bytes resource_hash = 3;
+ string code = 4;
+}
+
+message GeoSiteList {
+ repeated GeoSite entry = 1;
+}
diff --git a/src/geoparsers/v2ray/types.rs b/src/geoparsers/v2ray/types.rs
new file mode 100644
index 0000000..d7c0436
--- /dev/null
+++ b/src/geoparsers/v2ray/types.rs
@@ -0,0 +1,121 @@
+// This file is @generated by prost-build.
+/// Domain for routing decision.
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct Domain {
+ /// Domain matching type.
+ #[prost(enumeration = "domain::Type", tag = "1")]
+ pub r#type: i32,
+ /// Domain value.
+ #[prost(string, tag = "2")]
+ pub value: ::prost::alloc::string::String,
+ /// Attributes of this domain. May be used for filtering.
+ #[prost(message, repeated, tag = "3")]
+ pub attribute: ::prost::alloc::vec::Vec<domain::Attribute>,
+}
+/// Nested message and enum types in `Domain`.
+pub mod domain {
+ /// Attribute of the domain.
+ #[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
+ pub struct Attribute {
+ #[prost(string, tag = "1")]
+ pub key: ::prost::alloc::string::String,
+ #[prost(oneof = "attribute::TypedValue", tags = "2, 3")]
+ pub typed_value: ::core::option::Option<attribute::TypedValue>,
+ }
+ /// Nested message and enum types in `Attribute`.
+ pub mod attribute {
+ #[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Oneof)]
+ pub enum TypedValue {
+ #[prost(bool, tag = "2")]
+ BoolValue(bool),
+ #[prost(int64, tag = "3")]
+ IntValue(i64),
+ }
+ }
+ /// Type of domain value.
+ #[derive(
+ Clone,
+ Copy,
+ Debug,
+ PartialEq,
+ Eq,
+ Hash,
+ PartialOrd,
+ Ord,
+ ::prost::Enumeration
+ )]
+ #[repr(i32)]
+ pub enum Type {
+ /// The value is used as is.
+ Plain = 0,
+ /// The value is used as a regular expression.
+ Regex = 1,
+ /// The value is a root domain.
+ Domain = 2,
+ /// The value is a domain.
+ Full = 3,
+ }
+ impl Type {
+ /// String value of the enum field names used in the ProtoBuf definition.
+ ///
+ /// The values are not transformed in any way and thus are considered stable
+ /// (if the ProtoBuf definition does not change) and safe for programmatic use.
+ pub fn as_str_name(&self) -> &'static str {
+ match self {
+ Self::Plain => "Plain",
+ Self::Regex => "Regex",
+ Self::Domain => "Domain",
+ Self::Full => "Full",
+ }
+ }
+ /// Creates an enum from field names used in the ProtoBuf definition.
+ pub fn from_str_name(value: &str) -> ::core::option::Option<Self> {
+ match value {
+ "Plain" => Some(Self::Plain),
+ "Regex" => Some(Self::Regex),
+ "Domain" => Some(Self::Domain),
+ "Full" => Some(Self::Full),
+ _ => None,
+ }
+ }
+ }
+}
+/// IP for routing decision, in CIDR form.
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
+pub struct Cidr {
+ /// IP address, should be either 4 or 16 bytes.
+ #[prost(bytes = "vec", tag = "1")]
+ pub ip: ::prost::alloc::vec::Vec<u8>,
+ /// Number of leading ones in the network mask.
+ #[prost(uint32, tag = "2")]
+ pub prefix: u32,
+}
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct GeoIp {
+ #[prost(string, tag = "1")]
+ pub country_code: ::prost::alloc::string::String,
+ #[prost(message, repeated, tag = "2")]
+ pub cidr: ::prost::alloc::vec::Vec<Cidr>,
+}
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct GeoIpList {
+ #[prost(message, repeated, tag = "1")]
+ pub entry: ::prost::alloc::vec::Vec<GeoIp>,
+}
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct GeoSite {
+ #[prost(string, tag = "1")]
+ pub country_code: ::prost::alloc::string::String,
+ #[prost(message, repeated, tag = "2")]
+ pub domain: ::prost::alloc::vec::Vec<Domain>,
+ /// resource_hash instruct simplified config converter to load domain from geo file.
+ #[prost(bytes = "vec", tag = "3")]
+ pub resource_hash: ::prost::alloc::vec::Vec<u8>,
+ #[prost(string, tag = "4")]
+ pub code: ::prost::alloc::string::String,
+}
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct GeoSiteList {
+ #[prost(message, repeated, tag = "1")]
+ pub entry: ::prost::alloc::vec::Vec<GeoSite>,
+}