@@ -10,6 +10,7 @@ private import semmle.python.Concepts
1010private import semmle.python.dataflow.new.RemoteFlowSources
1111private import semmle.python.dataflow.new.BarrierGuards
1212private import semmle.python.frameworks.data.ModelsAsData
13+ private import semmle.python.ApiGraphs
1314
1415/**
1516 * Provides default sources, sinks and sanitizers for detecting
@@ -42,12 +43,27 @@ module LogInjection {
4243 */
4344 private class ActiveThreatModelSourceAsSource extends Source , ActiveThreatModelSource { }
4445
46+ /**
47+ * Holds if `node` is an argument to a logging call where the format string
48+ * uses `%r` for that argument, which applies `repr()` escaping.
49+ */
50+ private predicate isReprFormattedLoggingArg ( DataFlow:: Node node ) {
51+ exists ( Logging log |
52+ node = log .getAnInput ( ) and
53+ exists ( DataFlow:: Node fmtNode |
54+ fmtNode = log .getAnInput ( ) and
55+ fmtNode .asExpr ( ) .( StringLiteral ) .getText ( ) .regexpMatch ( ".*%r.*" )
56+ )
57+ )
58+ }
59+
4560 /**
4661 * A logging operation, considered as a flow sink.
4762 */
4863 class LoggingAsSink extends Sink {
4964 LoggingAsSink ( ) {
5065 this = any ( Logging write ) .getAnInput ( ) and
66+ not isReprFormattedLoggingArg ( this ) and
5167 // since the inner implementation of the `logging.Logger.warn` function is
5268 // ```py
5369 // class Logger:
@@ -107,6 +123,14 @@ module LogInjection {
107123 }
108124 }
109125
126+ /**
127+ * A call to `repr()`, considered as a sanitizer.
128+ * `repr()` escapes special characters such as newlines, preventing log injection.
129+ */
130+ class ReprCallSanitizer extends Sanitizer , DataFlow:: CallCfgNode {
131+ ReprCallSanitizer ( ) { this = API:: builtin ( "repr" ) .getACall ( ) }
132+ }
133+
110134 /**
111135 * A sanitizer defined via models-as-data with kind "log-injection".
112136 */
0 commit comments