##! An interface for driving the analysis of files, possibly independent of ##! any network protocol over which they're transported. @load base/bif/file_analysis.bif @load base/frameworks/analyzer @load base/frameworks/logging @load base/utils/site module Files; export { redef enum Log::ID += { ## Logging stream for file analysis. LOG }; ## A structure which parameterizes a type of file analysis. type AnalyzerArgs: record { ## An event which will be generated for all new file contents, ## chunk-wise. Used when *tag* (in the ## :bro:see:`Files::add_analyzer` function) is ## :bro:see:`Files::ANALYZER_DATA_EVENT`. chunk_event: event(f: fa_file, data: string, off: count) &optional; ## An event which will be generated for all new file contents, ## stream-wise. Used when *tag* is ## :bro:see:`Files::ANALYZER_DATA_EVENT`. stream_event: event(f: fa_file, data: string) &optional; } &redef; ## Contains all metadata related to the analysis of a given file. ## For the most part, fields here are derived from ones of the same name ## in :bro:see:`fa_file`. type Info: record { ## The time when the file was first seen. ts: time &log; ## An identifier associated with a single file. fuid: string &log; ## If this file was transferred over a network ## connection this should show the host or hosts that ## the data sourced from. tx_hosts: set[addr] &default=addr_set() &log; ## If this file was transferred over a network ## connection this should show the host or hosts that ## the data traveled to. rx_hosts: set[addr] &default=addr_set() &log; ## Connection UIDs over which the file was transferred. conn_uids: set[string] &default=string_set() &log; ## An identification of the source of the file data. E.g. it ## may be a network protocol over which it was transferred, or a ## local file path which was read, or some other input source. source: string &log &optional; ## A value to represent the depth of this file in relation ## to its source. In SMTP, it is the depth of the MIME ## attachment on the message. In HTTP, it is the depth of the ## request within the TCP connection. depth: count &default=0 &log; ## A set of analysis types done during the file analysis. analyzers: set[string] &default=string_set() &log; ## A mime type provided by the strongest file magic signature ## match against the *bof_buffer* field of :bro:see:`fa_file`, ## or in the cases where no buffering of the beginning of file ## occurs, an initial guess of the mime type based on the first ## data seen. mime_type: string &log &optional; ## A filename for the file if one is available from the source ## for the file. These will frequently come from ## "Content-Disposition" headers in network protocols. filename: string &log &optional; ## The duration the file was analyzed for. duration: interval &log &default=0secs; ## If the source of this file is a network connection, this field ## indicates if the data originated from the local network or not as ## determined by the configured :bro:see:`Site::local_nets`. local_orig: bool &log &optional; ## If the source of this file is a network connection, this field ## indicates if the file is being sent by the originator of the ## connection or the responder. is_orig: bool &log &optional; ## Number of bytes provided to the file analysis engine for the file. seen_bytes: count &log &default=0; ## Total number of bytes that are supposed to comprise the full file. total_bytes: count &log &optional; ## The number of bytes in the file stream that were completely missed ## during the process of analysis e.g. due to dropped packets. missing_bytes: count &log &default=0; ## The number of bytes in the file stream that were not delivered to ## stream file analyzers. This could be overlapping bytes or ## bytes that couldn't be reassembled. overflow_bytes: count &log &default=0; ## Whether the file analysis timed out at least once for the file. timedout: bool &log &default=F; ## Identifier associated with a container file from which this one was ## extracted as part of the file analysis. parent_fuid: string &log &optional; } &redef; ## A table that can be used to disable file analysis completely for ## any files transferred over given network protocol analyzers. const disable: table[Files::Tag] of bool = table() &redef; ## The salt concatenated to unique file handle strings generated by ## :bro:see:`get_file_handle` before hashing them in to a file id ## (the *id* field of :bro:see:`fa_file`). ## Provided to help mitigate the possibility of manipulating parts of ## network connections that factor in to the file handle in order to ## generate two handles that would hash to the same file id. const salt = "I recommend changing this." &redef; ## Decide if you want to automatically attached analyzers to ## files based on the detected mime type of the file. const analyze_by_mime_type_automatically = T &redef; ## The default setting for file reassembly. const enable_reassembler = T &redef; ## The default per-file reassembly buffer size. const reassembly_buffer_size = 524288 &redef; ## Allows the file reassembler to be used if it's necessary because the ## file is transferred out of order. ## ## f: the file. global enable_reassembly: function(f: fa_file); ## Disables the file reassembler on this file. If the file is not ## transferred out of order this will have no effect. ## ## f: the file. global disable_reassembly: function(f: fa_file); ## Set the maximum size the reassembly buffer is allowed to grow ## for the given file. ## ## f: the file. ## ## max: Maximum allowed size of the reassembly buffer. global set_reassembly_buffer_size: function(f: fa_file, max: count); ## Sets the *timeout_interval* field of :bro:see:`fa_file`, which is ## used to determine the length of inactivity that is allowed for a file ## before internal state related to it is cleaned up. When used within ## a :bro:see:`file_timeout` handler, the analysis will delay timing out ## again for the period specified by *t*. ## ## f: the file. ## ## t: the amount of time the file can remain inactive before discarding. ## ## Returns: true if the timeout interval was set, or false if analysis ## for the file isn't currently active. global set_timeout_interval: function(f: fa_file, t: interval): bool; ## Adds an analyzer to the analysis of a given file. ## ## f: the file. ## ## tag: the analyzer type. ## ## args: any parameters the analyzer takes. ## ## Returns: true if the analyzer will be added, or false if analysis ## for the file isn't currently active or the *args* ## were invalid for the analyzer type. global add_analyzer: function(f: fa_file, tag: Files::Tag, args: AnalyzerArgs &default=AnalyzerArgs()): bool; ## Removes an analyzer from the analysis of a given file. ## ## f: the file. ## ## tag: the analyzer type. ## ## args: the analyzer (type and args) to remove. ## ## Returns: true if the analyzer will be removed, or false if analysis ## for the file isn't currently active. global remove_analyzer: function(f: fa_file, tag: Files::Tag, args: AnalyzerArgs &default=AnalyzerArgs()): bool; ## Stops/ignores any further analysis of a given file. ## ## f: the file. ## ## Returns: true if analysis for the given file will be ignored for the ## rest of its contents, or false if analysis for the file ## isn't currently active. global stop: function(f: fa_file): bool; ## Translates a file analyzer enum value to a string with the ## analyzer's name. ## ## tag: The analyzer tag. ## ## Returns: The analyzer name corresponding to the tag. global analyzer_name: function(tag: Files::Tag): string; ## Provides a text description regarding metadata of the file. ## For example, with HTTP it would return a URL. ## ## f: The file to be described. ## ## Returns: a text description regarding metadata of the file. global describe: function(f: fa_file): string; type ProtoRegistration: record { ## A callback to generate a file handle on demand when ## one is needed by the core. get_file_handle: function(c: connection, is_orig: bool): string; ## A callback to "describe" a file. In the case of an HTTP ## transfer the most obvious description would be the URL. ## It's like an extremely compressed version of the normal log. describe: function(f: fa_file): string &default=function(f: fa_file): string { return ""; }; }; ## Register callbacks for protocols that work with the Files framework. ## The callbacks must uniquely identify a file and each protocol can ## only have a single callback registered for it. ## ## tag: Tag for the protocol analyzer having a callback being registered. ## ## reg: A :bro:see:`Files::ProtoRegistration` record. ## ## Returns: true if the protocol being registered was not previously registered. global register_protocol: function(tag: Analyzer::Tag, reg: ProtoRegistration): bool; ## Register a callback for file analyzers to use if they need to do some ## manipulation when they are being added to a file before the core code ## takes over. This is unlikely to be interesting for users and should ## only be called by file analyzer authors but is *not required*. ## ## tag: Tag for the file analyzer. ## ## callback: Function to execute when the given file analyzer is being added. global register_analyzer_add_callback: function(tag: Files::Tag, callback: function(f: fa_file, args: AnalyzerArgs)); ## Registers a set of MIME types for an analyzer. If a future connection on one of ## these types is seen, the analyzer will be automatically assigned to parsing it. ## The function *adds* to all MIME types already registered, it doesn't replace ## them. ## ## tag: The tag of the analyzer. ## ## mts: The set of MIME types, each in the form "foo/bar" (case-insensitive). ## ## Returns: True if the MIME types were successfully registered. global register_for_mime_types: function(tag: Files::Tag, mts: set[string]) : bool; ## Registers a MIME type for an analyzer. If a future file with this type is seen, ## the analyzer will be automatically assigned to parsing it. The function *adds* ## to all MIME types already registered, it doesn't replace them. ## ## tag: The tag of the analyzer. ## ## mt: The MIME type in the form "foo/bar" (case-insensitive). ## ## Returns: True if the MIME type was successfully registered. global register_for_mime_type: function(tag: Files::Tag, mt: string) : bool; ## Returns a set of all MIME types currently registered for a specific analyzer. ## ## tag: The tag of the analyzer. ## ## Returns: The set of MIME types. global registered_mime_types: function(tag: Files::Tag) : set[string]; ## Returns a table of all MIME-type-to-analyzer mappings currently registered. ## ## Returns: A table mapping each analyzer to the set of MIME types ## registered for it. global all_registered_mime_types: function() : table[Files::Tag] of set[string]; ## Event that can be handled to access the Info record as it is sent on ## to the logging framework. global log_files: event(rec: Info); } redef record fa_file += { info: Info &optional; }; # Store the callbacks for protocol analyzers that have files. global registered_protocols: table[Analyzer::Tag] of ProtoRegistration = table(); # Store the MIME type to analyzer mappings. global mime_types: table[Files::Tag] of set[string]; global mime_type_to_analyzers: table[string] of set[Files::Tag]; global analyzer_add_callbacks: table[Files::Tag] of function(f: fa_file, args: AnalyzerArgs) = table(); event bro_init() &priority=5 { Log::create_stream(Files::LOG, [$columns=Info, $ev=log_files, $path="files"]); } function set_info(f: fa_file) { if ( ! f?$info ) { local tmp: Info = Info($ts=f$last_active, $fuid=f$id); f$info = tmp; } if ( f?$parent_id ) f$info$parent_fuid = f$parent_id; if ( f?$source ) f$info$source = f$source; f$info$duration = f$last_active - f$info$ts; f$info$seen_bytes = f$seen_bytes; if ( f?$total_bytes ) f$info$total_bytes = f$total_bytes; f$info$missing_bytes = f$missing_bytes; f$info$overflow_bytes = f$overflow_bytes; if ( f?$is_orig ) f$info$is_orig = f$is_orig; } function set_timeout_interval(f: fa_file, t: interval): bool { return __set_timeout_interval(f$id, t); } function enable_reassembly(f: fa_file) { __enable_reassembly(f$id); } function disable_reassembly(f: fa_file) { __disable_reassembly(f$id); } function set_reassembly_buffer_size(f: fa_file, max: count) { __set_reassembly_buffer(f$id, max); } function add_analyzer(f: fa_file, tag: Files::Tag, args: AnalyzerArgs): bool { add f$info$analyzers[Files::analyzer_name(tag)]; if ( tag in analyzer_add_callbacks ) analyzer_add_callbacks[tag](f, args); if ( ! __add_analyzer(f$id, tag, args) ) { Reporter::warning(fmt("Analyzer %s not added successfully to file %s.", tag, f$id)); return F; } return T; } function register_analyzer_add_callback(tag: Files::Tag, callback: function(f: fa_file, args: AnalyzerArgs)) { analyzer_add_callbacks[tag] = callback; } function remove_analyzer(f: fa_file, tag: Files::Tag, args: AnalyzerArgs): bool { return __remove_analyzer(f$id, tag, args); } function stop(f: fa_file): bool { return __stop(f$id); } function analyzer_name(tag: Files::Tag): string { return __analyzer_name(tag); } function register_protocol(tag: Analyzer::Tag, reg: ProtoRegistration): bool { local result = (tag !in registered_protocols); registered_protocols[tag] = reg; return result; } function register_for_mime_types(tag: Files::Tag, mime_types: set[string]) : bool { local rc = T; for ( mt in mime_types ) { if ( ! register_for_mime_type(tag, mt) ) rc = F; } return rc; } function register_for_mime_type(tag: Files::Tag, mt: string) : bool { if ( tag !in mime_types ) { mime_types[tag] = set(); } add mime_types[tag][mt]; if ( mt !in mime_type_to_analyzers ) { mime_type_to_analyzers[mt] = set(); } add mime_type_to_analyzers[mt][tag]; return T; } function registered_mime_types(tag: Files::Tag) : set[string] { return tag in mime_types ? mime_types[tag] : set(); } function all_registered_mime_types(): table[Files::Tag] of set[string] { return mime_types; } function describe(f: fa_file): string { local tag = Analyzer::get_tag(f$source); if ( tag !in registered_protocols ) return ""; local handler = registered_protocols[tag]; return handler$describe(f); } event get_file_handle(tag: Files::Tag, c: connection, is_orig: bool) &priority=5 { if ( tag !in registered_protocols ) return; local handler = registered_protocols[tag]; set_file_handle(handler$get_file_handle(c, is_orig)); } event file_new(f: fa_file) &priority=10 { set_info(f); if ( enable_reassembler ) { Files::enable_reassembly(f); Files::set_reassembly_buffer_size(f, reassembly_buffer_size); } } event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=10 { set_info(f); add f$info$conn_uids[c$uid]; local cid = c$id; add f$info$tx_hosts[f$is_orig ? cid$orig_h : cid$resp_h]; if( |Site::local_nets| > 0 ) f$info$local_orig=Site::is_local_addr(f$is_orig ? cid$orig_h : cid$resp_h); add f$info$rx_hosts[f$is_orig ? cid$resp_h : cid$orig_h]; } event file_sniff(f: fa_file, meta: fa_metadata) &priority=10 { set_info(f); if ( ! meta?$mime_type ) return; f$info$mime_type = meta$mime_type; if ( analyze_by_mime_type_automatically && meta$mime_type in mime_type_to_analyzers ) { local analyzers = mime_type_to_analyzers[meta$mime_type]; for ( a in analyzers ) { add f$info$analyzers[Files::analyzer_name(a)]; Files::add_analyzer(f, a); } } } event file_timeout(f: fa_file) &priority=10 { set_info(f); f$info$timedout = T; } event file_state_remove(f: fa_file) &priority=10 { set_info(f); } event file_state_remove(f: fa_file) &priority=-10 { Log::write(Files::LOG, f$info); }